pax_global_header00006660000000000000000000000064146756344710014533gustar00rootroot0000000000000052 comment=5945fc1945a4001537072e39f03725f944437834 yt-dlp-2024.09.27/000077500000000000000000000000001467563447100133535ustar00rootroot00000000000000yt-dlp-2024.09.27/.editorconfig000066400000000000000000000002061467563447100160260ustar00rootroot00000000000000root = true [**.py] charset = utf-8 indent_size = 4 indent_style = space trim_trailing_whitespace = true insert_final_newline = true yt-dlp-2024.09.27/.gitattributes000066400000000000000000000001531467563447100162450ustar00rootroot00000000000000* text=auto Makefile* text whitespace=-tab-in-indent *.sh text eol=lf *.md diff=markdown *.py diff=python yt-dlp-2024.09.27/.github/000077500000000000000000000000001467563447100147135ustar00rootroot00000000000000yt-dlp-2024.09.27/.github/FUNDING.yml000066400000000000000000000013201467563447100165240ustar00rootroot00000000000000# These are supported funding model platforms github: # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] patreon: # Replace with a single Patreon username open_collective: # Replace with a single Open Collective username ko_fi: # Replace with a single Ko-fi username tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry liberapay: # Replace with a single Liberapay username issuehunt: # Replace with a single IssueHunt username otechie: # Replace with a single Otechie username custom: ['https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators'] yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/000077500000000000000000000000001467563447100170765ustar00rootroot00000000000000yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/1_broken_site.yml000066400000000000000000000113561467563447100223530ustar00rootroot00000000000000name: Broken site support description: Report issue with yt-dlp on a supported site labels: [triage, site-bug] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting that yt-dlp is broken on a **supported** site required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/2_site_support_request.yml000066400000000000000000000122251467563447100243540ustar00rootroot00000000000000name: Site support request description: Request support for a new site labels: [triage, site-request] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a new site support request required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Provide all kinds of example URLs for which support should be added placeholder: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/3_site_feature_request.yml000066400000000000000000000113131467563447100242710ustar00rootroot00000000000000name: Site feature request description: Request a new functionality for a supported site labels: [triage, site-enhancement] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a site-specific feature required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Example URLs that can be used to demonstrate the requested feature placeholder: | https://www.youtube.com/watch?v=BaW_jenozKc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/4_bug_report.yml000066400000000000000000000105471467563447100222230ustar00rootroot00000000000000name: Core bug report description: Report a bug unrelated to any particular site or extractor labels: [triage, bug] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a bug unrelated to a specific site required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) required: true - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell validations: required: true - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/5_feature_request.yml000066400000000000000000000077231467563447100232610ustar00rootroot00000000000000name: Feature request description: Request a new functionality unrelated to any particular site or extractor labels: [triage, enhancement] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a feature unrelated to a specific site required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/6_question.yml000066400000000000000000000103661467563447100217230ustar00rootroot00000000000000name: Ask question description: Ask yt-dlp related question labels: [question] body: - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field required: true - type: markdown attributes: value: | ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature. If your question contains "isn't working" or "can you add", this is most likely the wrong template. If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**! - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm asking a question and **not** reporting a bug or requesting a feature required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: question attributes: label: Please make sure the question is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information and as much context and examples as possible validations: required: true - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU `) - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc render: shell - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE/config.yml000066400000000000000000000005271467563447100210720ustar00rootroot00000000000000blank_issues_enabled: false contact_links: - name: Get help from the community on Discord url: https://discord.gg/H5MNcFW63r about: Join the yt-dlp Discord for community-powered support! - name: Matrix Bridge to the Discord server url: https://matrix.to/#/#yt-dlp:matrix.org about: For those who do not want to use Discord yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/000077500000000000000000000000001467563447100201325ustar00rootroot00000000000000yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml000066400000000000000000000045571467563447100234140ustar00rootroot00000000000000name: Broken site support description: Report issue with yt-dlp on a supported site labels: [triage, site-bug] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting that yt-dlp is broken on a **supported** site required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml000066400000000000000000000054261467563447100254150ustar00rootroot00000000000000name: Site support request description: Request support for a new site labels: [triage, site-request] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a new site support request required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that none of provided URLs [violate any copyrights](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy) or contain any [DRM](https://en.wikipedia.org/wiki/Digital_rights_management) to the best of my knowledge required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and am willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Provide all kinds of example URLs for which support should be added placeholder: | - Single video: https://www.youtube.com/watch?v=BaW_jenozKc - Single video: https://youtu.be/BaW_jenozKc - Playlist: https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml000066400000000000000000000045141467563447100253320ustar00rootroot00000000000000name: Site feature request description: Request a new functionality for a supported site labels: [triage, site-enhancement] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a site-specific feature required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - label: I've read about [sharing account credentials](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#are-you-willing-to-share-account-details-if-needed) and I'm willing to share it if required - type: input id: region attributes: label: Region description: Enter the country/region that the site is accessible from placeholder: India - type: textarea id: example-urls attributes: label: Example URLs description: | Example URLs that can be used to demonstrate the requested feature placeholder: | https://www.youtube.com/watch?v=BaW_jenozKc validations: required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml000066400000000000000000000037501467563447100232550ustar00rootroot00000000000000name: Core bug report description: Report a bug unrelated to any particular site or extractor labels: [triage, bug] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm reporting a bug unrelated to a specific site required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've checked that all provided URLs are playable in a browser with the same IP and same login details required: true - label: I've checked that all URLs and arguments with special characters are [properly quoted or escaped](https://github.com/yt-dlp/yt-dlp/wiki/FAQ#video-url-contains-an-ampersand--and-im-getting-some-strange-output-1-2839-or-v-is-not-recognized-as-an-internal-or-external-command) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose)s yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml000066400000000000000000000032651467563447100243120ustar00rootroot00000000000000name: Feature request description: Request a new functionality unrelated to any particular site or extractor labels: [triage, enhancement] body: %(no_skip)s - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm requesting a feature unrelated to a specific site required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar issues **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: description attributes: label: Provide a description that is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true %(verbose_optional)s yt-dlp-2024.09.27/.github/ISSUE_TEMPLATE_tmpl/6_question.yml000066400000000000000000000037301467563447100227540ustar00rootroot00000000000000name: Ask question description: Ask yt-dlp related question labels: [question] body: %(no_skip)s - type: markdown attributes: value: | ### Make sure you are **only** asking a question and not reporting a bug or requesting a feature. If your question contains "isn't working" or "can you add", this is most likely the wrong template. If you are in doubt whether this is the right template, **USE ANOTHER TEMPLATE**! - type: checkboxes id: checklist attributes: label: Checklist description: | Carefully read and work through this check list in order to prevent the most common mistakes and misuse of yt-dlp: options: - label: I'm asking a question and **not** reporting a bug or requesting a feature required: true - label: I've looked through the [README](https://github.com/yt-dlp/yt-dlp#readme) required: true - label: I've verified that I have **updated yt-dlp to nightly or master** ([update instructions](https://github.com/yt-dlp/yt-dlp#update-channels)) required: true - label: I've searched [known issues](https://github.com/yt-dlp/yt-dlp/issues/3766) and the [bugtracker](https://github.com/yt-dlp/yt-dlp/issues?q=) for similar questions **including closed ones**. DO NOT post duplicates required: true - label: I've read the [guidelines for opening an issue](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#opening-an-issue) required: true - type: textarea id: question attributes: label: Please make sure the question is worded well enough to be understood description: See [is-the-description-of-the-issue-itself-sufficient](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-description-of-the-issue-itself-sufficient) placeholder: Provide any additional information and as much context and examples as possible validations: required: true %(verbose_optional)s yt-dlp-2024.09.27/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000042341467563447100205170ustar00rootroot00000000000000**IMPORTANT**: PRs without the template will be CLOSED ### Description of your *pull request* and other information ADD DESCRIPTION HERE Fixes #
Template ### Before submitting a *pull request* make sure you have: - [ ] At least skimmed through [contributing guidelines](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) including [yt-dlp coding conventions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#yt-dlp-coding-conventions) - [ ] [Searched](https://github.com/yt-dlp/yt-dlp/search?q=is%3Apr&type=Issues) the bugtracker for similar pull requests ### In order to be accepted and merged into yt-dlp each piece of code must be in public domain or released under [Unlicense](http://unlicense.org/). Check all of the following options that apply: - [ ] I am the original author of this code and I am willing to release it under [Unlicense](http://unlicense.org/) - [ ] I am not the original author of this code but it is in public domain or released under [Unlicense](http://unlicense.org/) (provide reliable evidence) ### What is the purpose of your *pull request*? - [ ] Fix or improvement to an extractor (Make sure to add/update tests) - [ ] New extractor ([Piracy websites will not be accepted](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#is-the-website-primarily-used-for-piracy)) - [ ] Core bug fix/improvement - [ ] New feature (It is strongly [recommended to open an issue first](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#adding-new-feature-or-making-overarching-changes))
yt-dlp-2024.09.27/.github/banner.svg000066400000000000000000000355121467563447100167070ustar00rootroot00000000000000 yt-dlp-2024.09.27/.github/workflows/000077500000000000000000000000001467563447100167505ustar00rootroot00000000000000yt-dlp-2024.09.27/.github/workflows/build.yml000066400000000000000000000476531467563447100206110ustar00rootroot00000000000000name: Build Artifacts on: workflow_call: inputs: version: required: true type: string channel: required: false default: stable type: string unix: default: true type: boolean linux_static: default: true type: boolean linux_arm: default: true type: boolean macos: default: true type: boolean macos_legacy: default: true type: boolean windows: default: true type: boolean windows32: default: true type: boolean origin: required: false default: '' type: string secrets: GPG_SIGNING_KEY: required: false workflow_dispatch: inputs: version: description: | VERSION: yyyy.mm.dd[.rev] or rev required: true type: string channel: description: | SOURCE of this build's updates: stable/nightly/master/ required: true default: stable type: string unix: description: yt-dlp, yt-dlp.tar.gz default: true type: boolean linux_static: description: yt-dlp_linux default: true type: boolean linux_arm: description: yt-dlp_linux_aarch64, yt-dlp_linux_armv7l default: true type: boolean macos: description: yt-dlp_macos, yt-dlp_macos.zip default: true type: boolean macos_legacy: description: yt-dlp_macos_legacy default: true type: boolean windows: description: yt-dlp.exe, yt-dlp_min.exe, yt-dlp_win.zip default: true type: boolean windows32: description: yt-dlp_x86.exe default: true type: boolean origin: description: Origin required: false default: 'current repo' type: choice options: - 'current repo' permissions: contents: read jobs: process: runs-on: ubuntu-latest outputs: origin: ${{ steps.process_origin.outputs.origin }} steps: - name: Process origin id: process_origin run: | echo "origin=${{ inputs.origin == 'current repo' && github.repository || inputs.origin }}" | tee "$GITHUB_OUTPUT" unix: needs: process if: inputs.unix runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Needed for changelog - uses: actions/setup-python@v5 with: python-version: "3.10" - name: Install Requirements run: | sudo apt -y install zip pandoc man sed - name: Prepare run: | python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python devscripts/update_changelog.py -vv python devscripts/make_lazy_extractors.py - name: Build Unix platform-independent binary run: | make all tar - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | chmod +x ./yt-dlp cp ./yt-dlp ./yt-dlp_downgraded version="$(./yt-dlp --version)" ./yt-dlp_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 downgraded_version="$(./yt-dlp_downgraded --version)" [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-${{ github.job }} path: | yt-dlp yt-dlp.tar.gz compression-level: 0 linux_static: needs: process if: inputs.linux_static runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Build static executable env: channel: ${{ inputs.channel }} origin: ${{ needs.process.outputs.origin }} version: ${{ inputs.version }} run: | mkdir ~/build cd bundle/docker docker compose up --build static sudo chown "${USER}:docker" ~/build/yt-dlp_linux - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | chmod +x ~/build/yt-dlp_linux cp ~/build/yt-dlp_linux ~/build/yt-dlp_linux_downgraded version="$(~/build/yt-dlp_linux --version)" ~/build/yt-dlp_linux_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 downgraded_version="$(~/build/yt-dlp_linux_downgraded --version)" [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-${{ github.job }} path: | ~/build/yt-dlp_linux compression-level: 0 linux_arm: needs: process if: inputs.linux_arm permissions: contents: read packages: write # for creating cache runs-on: ubuntu-latest strategy: matrix: architecture: - armv7 - aarch64 steps: - uses: actions/checkout@v4 with: path: ./repo - name: Virtualized Install, Prepare & Build uses: yt-dlp/run-on-arch-action@v2 with: # Ref: https://github.com/uraimo/run-on-arch-action/issues/55 env: | GITHUB_WORKFLOW: build githubToken: ${{ github.token }} # To cache image arch: ${{ matrix.architecture }} distro: ubuntu18.04 # Standalone executable should be built on minimum supported OS dockerRunArgs: --volume "${PWD}/repo:/repo" install: | # Installing Python 3.10 from the Deadsnakes repo raises errors apt update apt -y install zlib1g-dev libffi-dev python3.8 python3.8-dev python3.8-distutils python3-pip python3.8 -m pip install -U pip setuptools wheel # Cannot access any files from the repo directory at this stage python3.8 -m pip install -U Pyinstaller mutagen pycryptodomex websockets brotli certifi secretstorage cffi run: | cd repo python3.8 devscripts/install_deps.py -o --include build python3.8 devscripts/install_deps.py --include pyinstaller --include secretstorage # Cached version may be out of date python3.8 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3.8 devscripts/make_lazy_extractors.py python3.8 -m bundle.pyinstaller if ${{ vars.UPDATE_TO_VERIFICATION && 'true' || 'false' }}; then arch="${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }}" chmod +x ./dist/yt-dlp_linux_${arch} cp ./dist/yt-dlp_linux_${arch} ./dist/yt-dlp_linux_${arch}_downgraded version="$(./dist/yt-dlp_linux_${arch} --version)" ./dist/yt-dlp_linux_${arch}_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 downgraded_version="$(./dist/yt-dlp_linux_${arch}_downgraded --version)" [[ "$version" != "$downgraded_version" ]] fi - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-linux_${{ matrix.architecture }} path: | # run-on-arch-action designates armv7l as armv7 repo/dist/yt-dlp_linux_${{ (matrix.architecture == 'armv7' && 'armv7l') || matrix.architecture }} compression-level: 0 macos: needs: process if: inputs.macos permissions: contents: read actions: write # For cleaning up cache runs-on: macos-12 steps: - uses: actions/checkout@v4 # NB: Building universal2 does not work with python from actions/setup-python - name: Restore cached requirements id: restore-cache uses: actions/cache/restore@v4 env: SEGMENT_DOWNLOAD_TIMEOUT_MINS: 1 with: path: | ~/yt-dlp-build-venv key: cache-reqs-${{ github.job }} - name: Install Requirements run: | brew install coreutils python3 -m venv ~/yt-dlp-build-venv source ~/yt-dlp-build-venv/bin/activate python3 devscripts/install_deps.py -o --include build python3 devscripts/install_deps.py --print --include pyinstaller > requirements.txt # We need to ignore wheels otherwise we break universal2 builds python3 -m pip install -U --no-binary :all: -r requirements.txt # We need to fuse our own universal2 wheels for curl_cffi python3 -m pip install -U 'delocate==0.11.0' mkdir curl_cffi_whls curl_cffi_universal2 python3 devscripts/install_deps.py --print -o --include curl-cffi > requirements.txt for platform in "macosx_11_0_arm64" "macosx_11_0_x86_64"; do python3 -m pip download \ --only-binary=:all: \ --platform "${platform}" \ -d curl_cffi_whls \ -r requirements.txt done ( # Overwrite x86_64-only libs with fat/universal2 libs or else Pyinstaller will do the opposite # See https://github.com/yt-dlp/yt-dlp/pull/10069 cd curl_cffi_whls mkdir -p curl_cffi/.dylibs python_libdir=$(python3 -c 'import sys; from pathlib import Path; print(Path(sys.path[1]).parent)') for dylib in lib{ssl,crypto}.3.dylib; do cp "${python_libdir}/${dylib}" "curl_cffi/.dylibs/${dylib}" for wheel in curl_cffi*macos*x86_64.whl; do zip "${wheel}" "curl_cffi/.dylibs/${dylib}" done done ) python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/curl_cffi*.whl -w curl_cffi_universal2 python3 -m delocate.cmd.delocate_fuse curl_cffi_whls/cffi*.whl -w curl_cffi_universal2 for wheel in curl_cffi_universal2/*cffi*.whl; do mv -n -- "${wheel}" "${wheel/x86_64/universal2}" done python3 -m pip install --force-reinstall -U curl_cffi_universal2/*cffi*.whl - name: Prepare run: | python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3 devscripts/make_lazy_extractors.py - name: Build run: | source ~/yt-dlp-build-venv/bin/activate python3 -m bundle.pyinstaller --target-architecture universal2 --onedir (cd ./dist/yt-dlp_macos && zip -r ../yt-dlp_macos.zip .) python3 -m bundle.pyinstaller --target-architecture universal2 - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | chmod +x ./dist/yt-dlp_macos cp ./dist/yt-dlp_macos ./dist/yt-dlp_macos_downgraded version="$(./dist/yt-dlp_macos --version)" ./dist/yt-dlp_macos_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 downgraded_version="$(./dist/yt-dlp_macos_downgraded --version)" [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-${{ github.job }} path: | dist/yt-dlp_macos dist/yt-dlp_macos.zip compression-level: 0 - name: Cleanup cache if: steps.restore-cache.outputs.cache-hit == 'true' env: GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} cache_key: cache-reqs-${{ github.job }} repository: ${{ github.repository }} branch: ${{ github.ref }} run: | gh extension install actions/gh-actions-cache gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm - name: Cache requirements uses: actions/cache/save@v4 with: path: | ~/yt-dlp-build-venv key: cache-reqs-${{ github.job }} macos_legacy: needs: process if: inputs.macos_legacy runs-on: macos-12 steps: - uses: actions/checkout@v4 - name: Install Python # We need the official Python, because the GA ones only support newer macOS versions env: PYTHON_VERSION: 3.10.5 MACOSX_DEPLOYMENT_TARGET: 10.9 # Used up by the Python build tools run: | # Hack to get the latest patch version. Uncomment if needed #brew install python@3.10 #export PYTHON_VERSION=$( $(brew --prefix)/opt/python@3.10/bin/python3 --version | cut -d ' ' -f 2 ) curl "https://www.python.org/ftp/python/${PYTHON_VERSION}/python-${PYTHON_VERSION}-macos11.pkg" -o "python.pkg" sudo installer -pkg python.pkg -target / python3 --version - name: Install Requirements run: | brew install coreutils python3 devscripts/install_deps.py --user -o --include build python3 devscripts/install_deps.py --user --include pyinstaller - name: Prepare run: | python3 devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python3 devscripts/make_lazy_extractors.py - name: Build run: | python3 -m bundle.pyinstaller mv dist/yt-dlp_macos dist/yt-dlp_macos_legacy - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | chmod +x ./dist/yt-dlp_macos_legacy cp ./dist/yt-dlp_macos_legacy ./dist/yt-dlp_macos_legacy_downgraded version="$(./dist/yt-dlp_macos_legacy --version)" ./dist/yt-dlp_macos_legacy_downgraded -v --update-to yt-dlp/yt-dlp@2023.03.04 downgraded_version="$(./dist/yt-dlp_macos_legacy_downgraded --version)" [[ "$version" != "$downgraded_version" ]] - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-${{ github.job }} path: | dist/yt-dlp_macos_legacy compression-level: 0 windows: needs: process if: inputs.windows runs-on: windows-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: # 3.8 is used for Win7 support python-version: "3.8" - name: Install Requirements run: | # Custom pyinstaller built with https://github.com/yt-dlp/pyinstaller-builds python devscripts/install_deps.py -o --include build python devscripts/install_deps.py --include curl-cffi python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/x86_64/pyinstaller-6.10.0-py3-none-any.whl" - name: Prepare run: | python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python devscripts/make_lazy_extractors.py - name: Build run: | python -m bundle.pyinstaller python -m bundle.pyinstaller --onedir Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_real.exe Compress-Archive -Path ./dist/yt-dlp/* -DestinationPath ./dist/yt-dlp_win.zip - name: Install Requirements (py2exe) run: | python devscripts/install_deps.py --include py2exe - name: Build (py2exe) run: | python -m bundle.py2exe Move-Item ./dist/yt-dlp.exe ./dist/yt-dlp_min.exe Move-Item ./dist/yt-dlp_real.exe ./dist/yt-dlp.exe - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | foreach ($name in @("yt-dlp","yt-dlp_min")) { Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" $version = & "./dist/${name}.exe" --version & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 $downgraded_version = & "./dist/${name}_downgraded.exe" --version if ($version -eq $downgraded_version) { exit 1 } } - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-${{ github.job }} path: | dist/yt-dlp.exe dist/yt-dlp_min.exe dist/yt-dlp_win.zip compression-level: 0 windows32: needs: process if: inputs.windows32 runs-on: windows-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.8" architecture: "x86" - name: Install Requirements run: | python devscripts/install_deps.py -o --include build python devscripts/install_deps.py python -m pip install -U "https://yt-dlp.github.io/Pyinstaller-Builds/i686/pyinstaller-6.10.0-py3-none-any.whl" - name: Prepare run: | python devscripts/update-version.py -c "${{ inputs.channel }}" -r "${{ needs.process.outputs.origin }}" "${{ inputs.version }}" python devscripts/make_lazy_extractors.py - name: Build run: | python -m bundle.pyinstaller - name: Verify --update-to if: vars.UPDATE_TO_VERIFICATION run: | foreach ($name in @("yt-dlp_x86")) { Copy-Item "./dist/${name}.exe" "./dist/${name}_downgraded.exe" $version = & "./dist/${name}.exe" --version & "./dist/${name}_downgraded.exe" -v --update-to yt-dlp/yt-dlp@2023.03.04 $downgraded_version = & "./dist/${name}_downgraded.exe" --version if ($version -eq $downgraded_version) { exit 1 } } - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-bin-${{ github.job }} path: | dist/yt-dlp_x86.exe compression-level: 0 meta_files: if: always() && !cancelled() needs: - process - unix - linux_static - linux_arm - macos - macos_legacy - windows - windows32 runs-on: ubuntu-latest steps: - uses: actions/download-artifact@v4 with: path: artifact pattern: build-bin-* merge-multiple: true - name: Make SHA2-SUMS files run: | cd ./artifact/ # make sure SHA sums are also printed to stdout sha256sum -- * | tee ../SHA2-256SUMS sha512sum -- * | tee ../SHA2-512SUMS # also print as permanent annotations to the summary page while read -r shasum; do echo "::notice title=${shasum##* }::sha256: ${shasum% *}" done < ../SHA2-256SUMS - name: Make Update spec run: | cat >> _update_spec << EOF # This file is used for regulating self-update lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) EOF - name: Sign checksum files env: GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} if: env.GPG_SIGNING_KEY != '' run: | gpg --batch --import <<< "${{ secrets.GPG_SIGNING_KEY }}" for signfile in ./SHA*SUMS; do gpg --batch --detach-sign "$signfile" done - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: build-${{ github.job }} path: | _update_spec SHA*SUMS* compression-level: 0 overwrite: true yt-dlp-2024.09.27/.github/workflows/codeql.yml000066400000000000000000000047461467563447100207550ustar00rootroot00000000000000name: "CodeQL" on: push: branches: [ 'master', 'gh-pages', 'release' ] pull_request: # The branches below must be a subset of the branches above branches: [ 'master' ] schedule: - cron: '59 11 * * 5' jobs: analyze: name: Analyze runs-on: ubuntu-latest permissions: actions: read contents: read security-events: write strategy: fail-fast: false matrix: language: [ 'python' ] # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] # Use only 'java' to analyze code written in Java, Kotlin or both # Use only 'javascript' to analyze code written in JavaScript, TypeScript or both # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support steps: - name: Checkout repository uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs # queries: security-extended,security-and-quality # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild uses: github/codeql-action/autobuild@v2 # ℹ️ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun # If the Autobuild fails above, remove it and uncomment the following three lines. # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. # - run: | # echo "Run, Build Application using script" # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis uses: github/codeql-action/analyze@v2 with: category: "/language:${{matrix.language}}" yt-dlp-2024.09.27/.github/workflows/core.yml000066400000000000000000000034131467563447100204240ustar00rootroot00000000000000name: Core Tests on: push: paths: - .github/** - devscripts/** - test/** - yt_dlp/**.py - '!yt_dlp/extractor/*.py' - yt_dlp/extractor/__init__.py - yt_dlp/extractor/common.py - yt_dlp/extractor/extractors.py pull_request: paths: - .github/** - devscripts/** - test/** - yt_dlp/**.py - '!yt_dlp/extractor/*.py' - yt_dlp/extractor/__init__.py - yt_dlp/extractor/common.py - yt_dlp/extractor/extractors.py permissions: contents: read concurrency: group: core-${{ github.event.pull_request.number || github.ref }} cancel-in-progress: ${{ github.event_name == 'pull_request' }} jobs: tests: name: Core Tests if: "!contains(github.event.head_commit.message, 'ci skip')" runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: os: [ubuntu-latest] # CPython 3.8 is in quick-test python-version: ['3.9', '3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.8' - os: windows-latest python-version: '3.12' - os: windows-latest python-version: pypy-3.9 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install test requirements run: python3 ./devscripts/install_deps.py --include test --include curl-cffi - name: Run tests timeout-minutes: 15 continue-on-error: False run: | python3 -m yt_dlp -v || true # Print debug head python3 ./devscripts/run_tests.py core yt-dlp-2024.09.27/.github/workflows/download.yml000066400000000000000000000026751467563447100213140ustar00rootroot00000000000000name: Download Tests on: [push, pull_request] permissions: contents: read jobs: quick: name: Quick Download Tests if: "contains(github.event.head_commit.message, 'ci run dl')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install test requirements run: python3 ./devscripts/install_deps.py --include dev - name: Run tests continue-on-error: true run: python3 ./devscripts/run_tests.py download full: name: Full Download Tests if: "contains(github.event.head_commit.message, 'ci run dl all')" runs-on: ${{ matrix.os }} strategy: fail-fast: true matrix: os: [ubuntu-latest] python-version: ['3.10', '3.11', '3.12', pypy-3.8, pypy-3.10] include: # atleast one of each CPython/PyPy tests must be in windows - os: windows-latest python-version: '3.8' - os: windows-latest python-version: pypy-3.9 steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install test requirements run: python3 ./devscripts/install_deps.py --include dev - name: Run tests continue-on-error: true run: python3 ./devscripts/run_tests.py download yt-dlp-2024.09.27/.github/workflows/issue-lockdown.yml000066400000000000000000000007131467563447100224420ustar00rootroot00000000000000name: Issue Lockdown on: issues: types: [opened] permissions: issues: write jobs: lockdown: name: Issue Lockdown if: vars.ISSUE_LOCKDOWN runs-on: ubuntu-latest steps: - name: "Lock new issue" env: GH_TOKEN: ${{ github.token }} ISSUE_NUMBER: ${{ github.event.issue.number }} REPOSITORY: ${{ github.repository }} run: | gh issue lock "${ISSUE_NUMBER}" -R "${REPOSITORY}" yt-dlp-2024.09.27/.github/workflows/quick-test.yml000066400000000000000000000022301467563447100215610ustar00rootroot00000000000000name: Quick Test on: [push, pull_request] permissions: contents: read jobs: tests: name: Core Test if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Set up Python 3.8 uses: actions/setup-python@v5 with: python-version: '3.8' - name: Install test requirements run: python3 ./devscripts/install_deps.py -o --include test - name: Run tests timeout-minutes: 15 run: | python3 -m yt_dlp -v || true python3 ./devscripts/run_tests.py core check: name: Code check if: "!contains(github.event.head_commit.message, 'ci skip all')" runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: '3.8' - name: Install dev dependencies run: python3 ./devscripts/install_deps.py -o --include static-analysis - name: Make lazy extractors run: python3 ./devscripts/make_lazy_extractors.py - name: Run ruff run: ruff check --output-format github . - name: Run autopep8 run: autopep8 --diff . yt-dlp-2024.09.27/.github/workflows/release-master.yml000066400000000000000000000012131467563447100224010ustar00rootroot00000000000000name: Release (master) on: push: branches: - master paths: - "yt_dlp/**.py" - "!yt_dlp/version.py" - "bundle/*.py" - "pyproject.toml" - "Makefile" - ".github/workflows/build.yml" concurrency: group: release-master permissions: contents: read jobs: release: if: vars.BUILD_MASTER != '' uses: ./.github/workflows/release.yml with: prerelease: true source: master permissions: contents: write packages: write # For package cache actions: write # For cleaning up cache id-token: write # mandatory for trusted publishing secrets: inherit yt-dlp-2024.09.27/.github/workflows/release-nightly.yml000066400000000000000000000022101467563447100225620ustar00rootroot00000000000000name: Release (nightly) on: schedule: - cron: '23 23 * * *' permissions: contents: read jobs: check_nightly: if: vars.BUILD_NIGHTLY != '' runs-on: ubuntu-latest outputs: commit: ${{ steps.check_for_new_commits.outputs.commit }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - name: Check for new commits id: check_for_new_commits run: | relevant_files=( "yt_dlp/*.py" ':!yt_dlp/version.py' "bundle/*.py" "pyproject.toml" "Makefile" ".github/workflows/build.yml" ) echo "commit=$(git log --format=%H -1 --since="24 hours ago" -- "${relevant_files[@]}")" | tee "$GITHUB_OUTPUT" release: needs: [check_nightly] if: ${{ needs.check_nightly.outputs.commit }} uses: ./.github/workflows/release.yml with: prerelease: true source: nightly permissions: contents: write packages: write # For package cache actions: write # For cleaning up cache id-token: write # mandatory for trusted publishing secrets: inherit yt-dlp-2024.09.27/.github/workflows/release.yml000066400000000000000000000362351467563447100211240ustar00rootroot00000000000000name: Release on: workflow_call: inputs: prerelease: required: false default: true type: boolean source: required: false default: '' type: string target: required: false default: '' type: string version: required: false default: '' type: string workflow_dispatch: inputs: source: description: | SOURCE of this release's updates: channel, repo, tag, or channel/repo@tag (default: ) required: false default: '' type: string target: description: | TARGET to publish this release to: channel, tag, or channel@tag (default: if writable else [@source_tag]) required: false default: '' type: string version: description: | VERSION: yyyy.mm.dd[.rev] or rev (default: auto-generated) required: false default: '' type: string prerelease: description: Pre-release default: false type: boolean permissions: contents: read jobs: prepare: permissions: contents: write runs-on: ubuntu-latest outputs: channel: ${{ steps.setup_variables.outputs.channel }} version: ${{ steps.setup_variables.outputs.version }} target_repo: ${{ steps.setup_variables.outputs.target_repo }} target_repo_token: ${{ steps.setup_variables.outputs.target_repo_token }} target_tag: ${{ steps.setup_variables.outputs.target_tag }} pypi_project: ${{ steps.setup_variables.outputs.pypi_project }} pypi_suffix: ${{ steps.setup_variables.outputs.pypi_suffix }} head_sha: ${{ steps.get_target.outputs.head_sha }} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: "3.10" - name: Process inputs id: process_inputs run: | cat << EOF ::group::Inputs prerelease=${{ inputs.prerelease }} source=${{ inputs.source }} target=${{ inputs.target }} version=${{ inputs.version }} ::endgroup:: EOF IFS='@' read -r source_repo source_tag <<<"${{ inputs.source }}" IFS='@' read -r target_repo target_tag <<<"${{ inputs.target }}" cat << EOF >> "$GITHUB_OUTPUT" source_repo=${source_repo} source_tag=${source_tag} target_repo=${target_repo} target_tag=${target_tag} EOF - name: Setup variables id: setup_variables env: source_repo: ${{ steps.process_inputs.outputs.source_repo }} source_tag: ${{ steps.process_inputs.outputs.source_tag }} target_repo: ${{ steps.process_inputs.outputs.target_repo }} target_tag: ${{ steps.process_inputs.outputs.target_tag }} run: | # unholy bash monstrosity (sincere apologies) fallback_token () { if ${{ !secrets.ARCHIVE_REPO_TOKEN }}; then echo "::error::Repository access secret ${target_repo_token^^} not found" exit 1 fi target_repo_token=ARCHIVE_REPO_TOKEN return 0 } source_is_channel=0 [[ "${source_repo}" == 'stable' ]] && source_repo='yt-dlp/yt-dlp' if [[ -z "${source_repo}" ]]; then source_repo='${{ github.repository }}' elif [[ '${{ vars[format('{0}_archive_repo', env.source_repo)] }}' ]]; then source_is_channel=1 source_channel='${{ vars[format('{0}_archive_repo', env.source_repo)] }}' elif [[ -z "${source_tag}" && "${source_repo}" != */* ]]; then source_tag="${source_repo}" source_repo='${{ github.repository }}' fi resolved_source="${source_repo}" if [[ "${source_tag}" ]]; then resolved_source="${resolved_source}@${source_tag}" elif [[ "${source_repo}" == 'yt-dlp/yt-dlp' ]]; then resolved_source='stable' fi revision="${{ (inputs.prerelease || !vars.PUSH_VERSION_COMMIT) && '$(date -u +"%H%M%S")' || '' }}" version="$( python devscripts/update-version.py \ -c "${resolved_source}" -r "${{ github.repository }}" ${{ inputs.version || '$revision' }} | \ grep -Po "version=\K\d+\.\d+\.\d+(\.\d+)?")" if [[ "${target_repo}" ]]; then if [[ -z "${target_tag}" ]]; then if [[ '${{ vars[format('{0}_archive_repo', env.target_repo)] }}' ]]; then target_tag="${source_tag:-${version}}" else target_tag="${target_repo}" target_repo='${{ github.repository }}' fi fi if [[ "${target_repo}" != '${{ github.repository}}' ]]; then target_repo='${{ vars[format('{0}_archive_repo', env.target_repo)] }}' target_repo_token='${{ env.target_repo }}_archive_repo_token' ${{ !!secrets[format('{0}_archive_repo_token', env.target_repo)] }} || fallback_token pypi_project='${{ vars[format('{0}_pypi_project', env.target_repo)] }}' pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.target_repo)] }}' fi else target_tag="${source_tag:-${version}}" if ((source_is_channel)); then target_repo="${source_channel}" target_repo_token='${{ env.source_repo }}_archive_repo_token' ${{ !!secrets[format('{0}_archive_repo_token', env.source_repo)] }} || fallback_token pypi_project='${{ vars[format('{0}_pypi_project', env.source_repo)] }}' pypi_suffix='${{ vars[format('{0}_pypi_suffix', env.source_repo)] }}' else target_repo='${{ github.repository }}' fi fi if [[ "${target_repo}" == '${{ github.repository }}' ]] && ${{ !inputs.prerelease }}; then pypi_project='${{ vars.PYPI_PROJECT }}' fi echo "::group::Output variables" cat << EOF | tee -a "$GITHUB_OUTPUT" channel=${resolved_source} version=${version} target_repo=${target_repo} target_repo_token=${target_repo_token} target_tag=${target_tag} pypi_project=${pypi_project} pypi_suffix=${pypi_suffix} EOF echo "::endgroup::" - name: Update documentation env: version: ${{ steps.setup_variables.outputs.version }} target_repo: ${{ steps.setup_variables.outputs.target_repo }} if: | !inputs.prerelease && env.target_repo == github.repository run: | python devscripts/update_changelog.py -vv make doc - name: Push to release id: push_release env: version: ${{ steps.setup_variables.outputs.version }} target_repo: ${{ steps.setup_variables.outputs.target_repo }} if: | !inputs.prerelease && env.target_repo == github.repository run: | git config --global user.name "github-actions[bot]" git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com" git add -u git commit -m "Release ${{ env.version }}" \ -m "Created by: ${{ github.event.sender.login }}" -m ":ci skip all" git push origin --force ${{ github.event.ref }}:release - name: Get target commitish id: get_target run: | echo "head_sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" - name: Update master env: target_repo: ${{ steps.setup_variables.outputs.target_repo }} if: | vars.PUSH_VERSION_COMMIT != '' && !inputs.prerelease && env.target_repo == github.repository run: git push origin ${{ github.event.ref }} build: needs: prepare uses: ./.github/workflows/build.yml with: version: ${{ needs.prepare.outputs.version }} channel: ${{ needs.prepare.outputs.channel }} origin: ${{ needs.prepare.outputs.target_repo }} permissions: contents: read packages: write # For package cache actions: write # For cleaning up cache secrets: GPG_SIGNING_KEY: ${{ secrets.GPG_SIGNING_KEY }} publish_pypi: needs: [prepare, build] if: ${{ needs.prepare.outputs.pypi_project }} runs-on: ubuntu-latest permissions: id-token: write # mandatory for trusted publishing steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: "3.10" - name: Install Requirements run: | sudo apt -y install pandoc man python devscripts/install_deps.py -o --include build - name: Prepare env: version: ${{ needs.prepare.outputs.version }} suffix: ${{ needs.prepare.outputs.pypi_suffix }} channel: ${{ needs.prepare.outputs.channel }} target_repo: ${{ needs.prepare.outputs.target_repo }} pypi_project: ${{ needs.prepare.outputs.pypi_project }} run: | python devscripts/update-version.py -c "${{ env.channel }}" -r "${{ env.target_repo }}" -s "${{ env.suffix }}" "${{ env.version }}" python devscripts/update_changelog.py -vv python devscripts/make_lazy_extractors.py sed -i -E '0,/(name = ")[^"]+(")/s//\1${{ env.pypi_project }}\2/' pyproject.toml - name: Build run: | rm -rf dist/* make pypi-files printf '%s\n\n' \ 'Official repository: ' \ '**PS**: Some links in this document will not work since this is a copy of the README.md from Github' > ./README.md.new cat ./README.md >> ./README.md.new && mv -f ./README.md.new ./README.md python devscripts/set-variant.py pip -M "You installed yt-dlp with pip or using the wheel from PyPi; Use that to update" make clean-cache python -m build --no-isolation . - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: verbose: true publish: needs: [prepare, build] permissions: contents: write runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/download-artifact@v4 with: path: artifact pattern: build-* merge-multiple: true - uses: actions/setup-python@v5 with: python-version: "3.10" - name: Generate release notes env: head_sha: ${{ needs.prepare.outputs.head_sha }} target_repo: ${{ needs.prepare.outputs.target_repo }} target_tag: ${{ needs.prepare.outputs.target_tag }} run: | printf '%s' \ '[![Installation](https://img.shields.io/badge/-Which%20file%20to%20download%3F-white.svg?style=for-the-badge)]' \ '(https://github.com/${{ github.repository }}#installation "Installation instructions") ' \ '[![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)]' \ '(https://discord.gg/H5MNcFW63r "Discord") ' \ '[![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)]' \ '(https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators "Donate") ' \ '[![Documentation](https://img.shields.io/badge/-Docs-brightgreen.svg?style=for-the-badge&logo=GitBook&labelColor=555555)]' \ '(https://github.com/${{ github.repository }}' \ '${{ env.target_repo == github.repository && format('/tree/{0}', env.target_tag) || '' }}#readme "Documentation") ' \ ${{ env.target_repo == 'yt-dlp/yt-dlp' && '\ "[![Nightly](https://img.shields.io/badge/Nightly%20builds-purple.svg?style=for-the-badge)]" \ "(https://github.com/yt-dlp/yt-dlp-nightly-builds/releases/latest \"Nightly builds\") " \ "[![Master](https://img.shields.io/badge/Master%20builds-lightblue.svg?style=for-the-badge)]" \ "(https://github.com/yt-dlp/yt-dlp-master-builds/releases/latest \"Master builds\")"' || '' }} > ./RELEASE_NOTES printf '\n\n' >> ./RELEASE_NOTES cat >> ./RELEASE_NOTES << EOF #### A description of the various files is in the [README](https://github.com/${{ github.repository }}#release-files) --- $(python ./devscripts/make_changelog.py -vv --collapsible) EOF printf '%s\n\n' '**This is a pre-release build**' >> ./PRERELEASE_NOTES cat ./RELEASE_NOTES >> ./PRERELEASE_NOTES printf '%s\n\n' 'Generated from: https://github.com/${{ github.repository }}/commit/${{ env.head_sha }}' >> ./ARCHIVE_NOTES cat ./RELEASE_NOTES >> ./ARCHIVE_NOTES - name: Publish to archive repo env: GH_TOKEN: ${{ secrets[needs.prepare.outputs.target_repo_token] }} GH_REPO: ${{ needs.prepare.outputs.target_repo }} version: ${{ needs.prepare.outputs.version }} channel: ${{ needs.prepare.outputs.channel }} if: | inputs.prerelease && env.GH_TOKEN != '' && env.GH_REPO != '' && env.GH_REPO != github.repository run: | title="${{ startswith(env.GH_REPO, 'yt-dlp/') && 'yt-dlp ' || '' }}${{ env.channel }}" gh release create \ --notes-file ARCHIVE_NOTES \ --title "${title} ${{ env.version }}" \ ${{ env.version }} \ artifact/* - name: Prune old release env: GH_TOKEN: ${{ github.token }} version: ${{ needs.prepare.outputs.version }} target_repo: ${{ needs.prepare.outputs.target_repo }} target_tag: ${{ needs.prepare.outputs.target_tag }} if: | env.target_repo == github.repository && env.target_tag != env.version run: | gh release delete --yes --cleanup-tag "${{ env.target_tag }}" || true git tag --delete "${{ env.target_tag }}" || true sleep 5 # Enough time to cover deletion race condition - name: Publish release env: GH_TOKEN: ${{ github.token }} version: ${{ needs.prepare.outputs.version }} target_repo: ${{ needs.prepare.outputs.target_repo }} target_tag: ${{ needs.prepare.outputs.target_tag }} head_sha: ${{ needs.prepare.outputs.head_sha }} if: | env.target_repo == github.repository run: | title="${{ github.repository == 'yt-dlp/yt-dlp' && 'yt-dlp ' || '' }}" title+="${{ env.target_tag != env.version && format('{0} ', env.target_tag) || '' }}" gh release create \ --notes-file ${{ inputs.prerelease && 'PRERELEASE_NOTES' || 'RELEASE_NOTES' }} \ --target ${{ env.head_sha }} \ --title "${title}${{ env.version }}" \ ${{ inputs.prerelease && '--prerelease' || '' }} \ ${{ env.target_tag }} \ artifact/* yt-dlp-2024.09.27/.github/workflows/sanitize-comment.yml000066400000000000000000000005161467563447100227630ustar00rootroot00000000000000name: Sanitize comment on: issue_comment: types: [created, edited] permissions: issues: write jobs: sanitize-comment: name: Sanitize comment if: vars.SANITIZE_COMMENT && !github.event.issue.pull_request runs-on: ubuntu-latest steps: - name: Sanitize comment uses: yt-dlp/sanitize-comment@v1 yt-dlp-2024.09.27/.gitignore000066400000000000000000000021431467563447100153430ustar00rootroot00000000000000# Config *.conf cookies *cookies.txt .netrc # Downloaded *.annotations.xml *.aria2 *.description *.dump *.frag *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl .cache/ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp # Allow config/media files in testdata !test/** # Python *.pyc *.pyo .*_cache wine-py2exe/ py2exe.log build/ dist/ zip/ tmp/ venv/ .venv/ completions/ # Misc *~ *.DS_Store *.kate-swp MANIFEST test/local_parameters.json .coverage cover/ secrets/ updates_key.pem *.egg-info .tox *.class *.isorted *.stackdump # Generated AUTHORS README.txt .mailmap *.1 *.bash-completion *.fish *.tar.gz *.zsh *.spec test/testdata/sigs/player-*.js # Binary /youtube-dl /youtube-dlc /yt-dlp yt-dlp.zip *.exe # Text Editor / IDE .idea *.iml .vscode *.sublime-* *.code-workspace *.swp # Lazy extractors */extractor/lazy_extractors.py # Plugins ytdlp_plugins/ yt-dlp-plugins yt-dlp-2024.09.27/.pre-commit-config.yaml000066400000000000000000000004441467563447100176360ustar00rootroot00000000000000repos: - repo: local hooks: - id: linter name: Apply linter fixes entry: ruff check --fix . language: system types: [python] require_serial: true - id: format name: Apply formatting fixes entry: autopep8 --in-place . language: system types: [python] yt-dlp-2024.09.27/.pre-commit-hatch.yaml000066400000000000000000000002341467563447100174550ustar00rootroot00000000000000repos: - repo: local hooks: - id: fix name: Apply code fixes entry: hatch fmt language: system types: [python] require_serial: true yt-dlp-2024.09.27/CONTRIBUTING.md000066400000000000000000001107751467563447100156170ustar00rootroot00000000000000# CONTRIBUTING TO YT-DLP - [OPENING AN ISSUE](#opening-an-issue) - [Is the description of the issue itself sufficient?](#is-the-description-of-the-issue-itself-sufficient) - [Are you using the latest version?](#are-you-using-the-latest-version) - [Is the issue already documented?](#is-the-issue-already-documented) - [Why are existing options not enough?](#why-are-existing-options-not-enough) - [Have you read and understood the changes, between youtube-dl and yt-dlp](#have-you-read-and-understood-the-changes-between-youtube-dl-and-yt-dlp) - [Is there enough context in your bug report?](#is-there-enough-context-in-your-bug-report) - [Does the issue involve one problem, and one problem only?](#does-the-issue-involve-one-problem-and-one-problem-only) - [Is anyone going to need the feature?](#is-anyone-going-to-need-the-feature) - [Is your question about yt-dlp?](#is-your-question-about-yt-dlp) - [Are you willing to share account details if needed?](#are-you-willing-to-share-account-details-if-needed) - [Is the website primarily used for piracy](#is-the-website-primarily-used-for-piracy) - [DEVELOPER INSTRUCTIONS](#developer-instructions) - [Adding new feature or making overarching changes](#adding-new-feature-or-making-overarching-changes) - [Adding support for a new site](#adding-support-for-a-new-site) - [yt-dlp coding conventions](#yt-dlp-coding-conventions) - [Mandatory and optional metafields](#mandatory-and-optional-metafields) - [Provide fallbacks](#provide-fallbacks) - [Regular expressions](#regular-expressions) - [Long lines policy](#long-lines-policy) - [Quotes](#quotes) - [Inline values](#inline-values) - [Collapse fallbacks](#collapse-fallbacks) - [Trailing parentheses](#trailing-parentheses) - [Use convenience conversion and parsing functions](#use-convenience-conversion-and-parsing-functions) - [My pull request is labeled pending-fixes](#my-pull-request-is-labeled-pending-fixes) - [EMBEDDING YT-DLP](README.md#embedding-yt-dlp) # OPENING AN ISSUE Bugs and suggestions should be reported at: [yt-dlp/yt-dlp/issues](https://github.com/yt-dlp/yt-dlp/issues). Unless you were prompted to or there is another pertinent reason (e.g. GitHub fails to accept the bug report), please do not send bug reports via personal email. For discussions, join us in our [discord server](https://discord.gg/H5MNcFW63r). **Please include the full output of yt-dlp when run with `-vU`**, i.e. **add** `-vU` flag to **your command line**, copy the **whole** output and post it in the issue body wrapped in \`\`\` for better formatting. It should look similar to this: ``` $ yt-dlp -vU [debug] Command-line config: ['-v', 'demo.com'] [debug] Encodings: locale UTF-8, fs utf-8, out utf-8, pref UTF-8 [debug] yt-dlp version 2021.09.25 (zip) [debug] Python version 3.8.10 (CPython 64bit) - Linux-5.4.0-74-generic-x86_64-with-glibc2.29 [debug] exe versions: ffmpeg 4.2.4, ffprobe 4.2.4 [debug] Proxy map: {} Current Build Hash 25cc412d1d3c0725a1f2f5b7e4682f6fb40e6d15f7024e96f7afd572e9919535 yt-dlp is up to date (2021.09.25) ... ``` **Do not post screenshots of verbose logs; only plain text is acceptable.** The output (including the first lines) contains important debugging information. Issues without the full output are often not reproducible and therefore will be closed as `incomplete`. The templates provided for the Issues, should be completed and **not removed**, this helps aide the resolution of the issue. Please re-read your issue once again to avoid a couple of common mistakes (you can and should use this as a checklist): ### Is the description of the issue itself sufficient? We often get issue reports that we cannot really decipher. While in most cases we eventually get the required information after asking back multiple times, this poses an unnecessary drain on our resources. So please elaborate on what feature you are requesting, or what bug you want to be fixed. Make sure that it's obvious - What the problem is - How it could be fixed - How your proposed solution would look like If your report is shorter than two lines, it is almost certainly missing some of these, which makes it hard for us to respond to it. We're often too polite to close the issue outright, but the missing info makes misinterpretation likely. We often get frustrated by these issues, since the only possible way for us to move forward on them is to ask for clarification over and over. For bug reports, this means that your report should contain the **complete** output of yt-dlp when called with the `-vU` flag. The error message you get for (most) bugs even says so, but you would not believe how many of our bug reports do not contain this information. If the error is `ERROR: Unable to extract ...` and you cannot reproduce it from multiple countries, add `--write-pages` and upload the `.dump` files you get [somewhere](https://gist.github.com). **Site support requests must contain an example URL**. An example URL is a URL you might want to download, like `https://www.youtube.com/watch?v=BaW_jenozKc`. There should be an obvious video present. Except under very special circumstances, the main page of a video service (e.g. `https://www.youtube.com/`) is *not* an example URL. ### Are you using the latest version? Before reporting any issue, type `yt-dlp -U`. This should report that you're up-to-date. This goes for feature requests as well. ### Is the issue already documented? Make sure that someone has not already opened the issue you're trying to open. Search at the top of the window or browse the [GitHub Issues](https://github.com/yt-dlp/yt-dlp/search?type=Issues) of this repository. If there is an issue, subscribe to it to be notified when there is any progress. Unless you have something useful to add to the conversation, please refrain from commenting. Additionally, it is also helpful to see if the issue has already been documented in the [youtube-dl issue tracker](https://github.com/ytdl-org/youtube-dl/issues). If similar issues have already been reported in youtube-dl (but not in our issue tracker), links to them can be included in your issue report here. ### Why are existing options not enough? Before requesting a new feature, please have a quick peek at [the list of supported options](README.md#usage-and-options). Many feature requests are for features that actually exist already! Please, absolutely do show off your work in the issue report and detail how the existing similar options do *not* solve your problem. ### Have you read and understood the changes, between youtube-dl and yt-dlp There are many changes between youtube-dl and yt-dlp [(changes to default behavior)](README.md#differences-in-default-behavior), and some of the options available have a different behaviour in yt-dlp, or have been removed all together [(list of changes to options)](README.md#deprecated-options). Make sure you have read and understand the differences in the options and how this may impact your downloads before opening an issue. ### Is there enough context in your bug report? People want to solve problems, and often think they do us a favor by breaking down their larger problems (e.g. wanting to skip already downloaded files) to a specific request (e.g. requesting us to look whether the file exists before downloading the info page). However, what often happens is that they break down the problem into two steps: One simple, and one impossible (or extremely complicated one). We are then presented with a very complicated request when the original problem could be solved far easier, e.g. by recording the downloaded video IDs in a separate file. To avoid this, you must include the greater context where it is non-obvious. In particular, every feature request that does not consist of adding support for a new site should contain a use case scenario that explains in what situation the missing feature would be useful. ### Does the issue involve one problem, and one problem only? Some of our users seem to think there is a limit of issues they can or should open. There is no limit of issues they can or should open. While it may seem appealing to be able to dump all your issues into one ticket, that means that someone who solves one of your issues cannot mark the issue as closed. Typically, reporting a bunch of issues leads to the ticket lingering since nobody wants to attack that behemoth, until someone mercifully splits the issue into multiple ones. In particular, every site support request issue should only pertain to services at one site (generally under a common domain, but always using the same backend technology). Do not request support for vimeo user videos, White house podcasts, and Google Plus pages in the same issue. Also, make sure that you don't post bug reports alongside feature requests. As a rule of thumb, a feature request does not include outputs of yt-dlp that are not immediately related to the feature at hand. Do not post reports of a network error alongside the request for a new video service. ### Is anyone going to need the feature? Only post features that you (or an incapacitated friend you can personally talk to) require. Do not post features because they seem like a good idea. If they are really useful, they will be requested by someone who requires them. ### Is your question about yt-dlp? Some bug reports are completely unrelated to yt-dlp and relate to a different, or even the reporter's own, application. Please make sure that you are actually using yt-dlp. If you are using a UI for yt-dlp, report the bug to the maintainer of the actual application providing the UI. In general, if you are unable to provide the verbose log, you should not be opening the issue here. If the issue is with `youtube-dl` (the upstream fork of yt-dlp) and not with yt-dlp, the issue should be raised in the youtube-dl project. ### Are you willing to share account details if needed? The maintainers and potential contributors of the project often do not have an account for the website you are asking support for. So any developer interested in solving your issue may ask you for account details. It is your personal discretion whether you are willing to share the account in order for the developer to try and solve your issue. However, if you are unwilling or unable to provide details, they obviously cannot work on the issue and it cannot be solved unless some developer who both has an account and is willing/able to contribute decides to solve it. By sharing an account with anyone, you agree to bear all risks associated with it. The maintainers and yt-dlp can't be held responsible for any misuse of the credentials. While these steps won't necessarily ensure that no misuse of the account takes place, these are still some good practices to follow. - Look for people with `Member` (maintainers of the project) or `Contributor` (people who have previously contributed code) tag on their messages. - Change the password before sharing the account to something random (use [this](https://passwordsgenerator.net/) if you don't have a random password generator). - Change the password after receiving the account back. ### Is the website primarily used for piracy? We follow [youtube-dl's policy](https://github.com/ytdl-org/youtube-dl#can-you-add-support-for-this-anime-video-site-or-site-which-shows-current-movies-for-free) to not support services that is primarily used for infringing copyright. Additionally, it has been decided to not to support porn sites that specialize in fakes. We also cannot support any service that serves only [DRM protected content](https://en.wikipedia.org/wiki/Digital_rights_management). # DEVELOPER INSTRUCTIONS Most users do not need to build yt-dlp and can [download the builds](https://github.com/yt-dlp/yt-dlp/releases), get them via [the other installation methods](README.md#installation) or directly run it using `python -m yt_dlp`. `yt-dlp` uses [`hatch`]() as a project management tool. You can easily install it using [`pipx`]() via `pipx install hatch`, or else via `pip` or your package manager of choice. Make sure you are using at least version `1.10.0`, otherwise some functionality might not work as expected. If you plan on contributing to `yt-dlp`, best practice is to start by running the following command: ```shell $ hatch run setup ``` The above command will install a `pre-commit` hook so that required checks/fixes (linting, formatting) will run automatically before each commit. If any code needs to be linted or formatted, then the commit will be blocked and the necessary changes will be made; you should review all edits and re-commit the fixed version. After this you can use `hatch shell` to enable a virtual environment that has `yt-dlp` and its development dependencies installed. In addition, the following script commands can be used to run simple tasks such as linting or testing (without having to run `hatch shell` first): * `hatch fmt`: Automatically fix linter violations and apply required code formatting changes * See `hatch fmt --help` for more info * `hatch test`: Run extractor or core tests * See `hatch test --help` for more info See item 6 of [new extractor tutorial](#adding-support-for-a-new-site) for how to run extractor specific test cases. While it is strongly recommended to use `hatch` for yt-dlp development, if you are unable to do so, alternatively you can manually create a virtual environment and use the following commands: ```shell # To only install development dependencies: $ python -m devscripts.install_deps --include dev # Or, for an editable install plus dev dependencies: $ python -m pip install -e ".[default,dev]" # To setup the pre-commit hook: $ pre-commit install # To be used in place of `hatch test`: $ python -m devscripts.run_tests # To be used in place of `hatch fmt`: $ ruff check --fix . $ autopep8 --in-place . # To only check code instead of applying fixes: $ ruff check . $ autopep8 --diff . ``` If you want to create a build of yt-dlp yourself, you can follow the instructions [here](README.md#compile). ## Adding new feature or making overarching changes Before you start writing code for implementing a new feature, open an issue explaining your feature request and at least one use case. This allows the maintainers to decide whether such a feature is desired for the project in the first place, and will provide an avenue to discuss some implementation details. If you open a pull request for a new feature without discussing with us first, do not be surprised when we ask for large changes to the code, or even reject it outright. The same applies for changes to the documentation, code style, or overarching changes to the architecture ## Adding support for a new site If you want to add support for a new site, first of all **make sure** this site is **not dedicated to [copyright infringement](#is-the-website-primarily-used-for-piracy)**. yt-dlp does **not support** such sites thus pull requests adding support for them **will be rejected**. After you have ensured this site is distributing its content legally, you can follow this quick list (assuming your service is called `yourextractor`): 1. [Fork this repository](https://github.com/yt-dlp/yt-dlp/fork) 1. Check out the source code with: ```shell $ git clone git@github.com:YOUR_GITHUB_USERNAME/yt-dlp.git ``` 1. Start a new git branch with ```shell $ cd yt-dlp $ git checkout -b yourextractor ``` 1. Start with this simple template and save it to `yt_dlp/extractor/yourextractor.py`: ```python from .common import InfoExtractor class YourExtractorIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?yourextractor\.com/watch/(?P[0-9]+)' _TESTS = [{ 'url': 'https://yourextractor.com/watch/42', 'md5': 'TODO: md5 sum of the first 10241 bytes of the video file (use --test)', 'info_dict': { # For videos, only the 'id' and 'ext' fields are required to RUN the test: 'id': '42', 'ext': 'mp4', # Then if the test run fails, it will output the missing/incorrect fields. # Properties can be added as: # * A value, e.g. # 'title': 'Video title goes here', # * MD5 checksum; start the string with 'md5:', e.g. # 'description': 'md5:098f6bcd4621d373cade4e832627b4f6', # * A regular expression; start the string with 're:', e.g. # 'thumbnail': r're:^https?://.*\.jpg$', # * A count of elements in a list; start the string with 'count:', e.g. # 'tags': 'count:10', # * Any Python type, e.g. # 'view_count': int, } }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) # TODO more code goes here, for example ... title = self._html_search_regex(r'

(.+?)

', webpage, 'title') return { 'id': video_id, 'title': title, 'description': self._og_search_description(webpage), 'uploader': self._search_regex(r']+id="uploader"[^>]*>([^<]+)<', webpage, 'uploader', fatal=False), # TODO more properties (see yt_dlp/extractor/common.py) } ``` 1. Add an import in [`yt_dlp/extractor/_extractors.py`](yt_dlp/extractor/_extractors.py). Note that the class name must end with `IE`. Also note that when adding a parenthesized import group, the last import in the group must have a trailing comma in order for this formatting to be respected by our code formatter. 1. Run `hatch test YourExtractor`. This *may fail* at first, but you can continually re-run it until you're done. Upon failure, it will output the missing fields and/or correct values which you can copy. If you decide to add more than one test, the tests will then be named `YourExtractor`, `YourExtractor_1`, `YourExtractor_2`, etc. Note that tests with an `only_matching` key in the test's dict are not included in the count. You can also run all the tests in one go with `YourExtractor_all` 1. Make sure you have at least one test for your extractor. Even if all videos covered by the extractor are expected to be inaccessible for automated testing, tests should still be added with a `skip` parameter indicating why the particular test is disabled from running. 1. Have a look at [`yt_dlp/extractor/common.py`](yt_dlp/extractor/common.py) for possible helper methods and a [detailed description of what your extractor should and may return](yt_dlp/extractor/common.py#L119-L440). Add tests and code for as many as you want. 1. Make sure your code follows [yt-dlp coding conventions](#yt-dlp-coding-conventions), passes [ruff](https://docs.astral.sh/ruff/tutorial/#getting-started) code checks and is properly formatted: ```shell $ hatch fmt --check ``` You can use `hatch fmt` to automatically fix problems. Rules that the linter/formatter enforces should not be disabled with `# noqa` unless a maintainer requests it. The only exception allowed is for old/printf-style string formatting in GraphQL query templates (use `# noqa: UP031`). 1. Make sure your code works under all [Python](https://www.python.org/) versions supported by yt-dlp, namely CPython and PyPy for Python 3.8 and above. Backward compatibility is not required for even older versions of Python. 1. When the tests pass, [add](https://git-scm.com/docs/git-add) the new files, [commit](https://git-scm.com/docs/git-commit) them and [push](https://git-scm.com/docs/git-push) the result, like this: ```shell $ git add yt_dlp/extractor/_extractors.py $ git add yt_dlp/extractor/yourextractor.py $ git commit -m '[yourextractor] Add extractor' $ git push origin yourextractor ``` 1. Finally, [create a pull request](https://help.github.com/articles/creating-a-pull-request). We'll then review and merge it. In any case, thank you very much for your contributions! **Tip:** To test extractors that require login information, create a file `test/local_parameters.json` and add `"usenetrc": true` or your `username`&`password` or `cookiefile`/`cookiesfrombrowser` in it: ```json { "username": "your user name", "password": "your password" } ``` ## yt-dlp coding conventions This section introduces a guide lines for writing idiomatic, robust and future-proof extractor code. Extractors are very fragile by nature since they depend on the layout of the source data provided by 3rd party media hosters out of your control and this layout tends to change. As an extractor implementer your task is not only to write code that will extract media links and metadata correctly but also to minimize dependency on the source's layout and even to make the code foresee potential future changes and be ready for that. This is important because it will allow the extractor not to break on minor layout changes thus keeping old yt-dlp versions working. Even though this breakage issue may be easily fixed by a new version of yt-dlp, this could take some time, during which the extractor will remain broken. ### Mandatory and optional metafields For extraction to work yt-dlp relies on metadata your extractor extracts and provides to yt-dlp expressed by an [information dictionary](yt_dlp/extractor/common.py#L119-L440) or simply *info dict*. Only the following meta fields in the *info dict* are considered mandatory for a successful extraction process by yt-dlp: - `id` (media identifier) - `title` (media title) - `url` (media download URL) or `formats` The aforementioned metafields are the critical data that the extraction does not make any sense without and if any of them fail to be extracted then the extractor is considered completely broken. While all extractors must return a `title`, they must also allow it's extraction to be non-fatal. For pornographic sites, appropriate `age_limit` must also be returned. The extractor is allowed to return the info dict without url or formats in some special cases if it allows the user to extract useful information with `--ignore-no-formats-error` - e.g. when the video is a live stream that has not started yet. [Any field](yt_dlp/extractor/common.py#219-L426) apart from the aforementioned ones are considered **optional**. That means that extraction should be **tolerant** to situations when sources for these fields can potentially be unavailable (even if they are always available at the moment) and **future-proof** in order not to break the extraction of general purpose mandatory fields. #### Example Say you have some source dictionary `meta` that you've fetched as JSON with HTTP request and it has a key `summary`: ```python meta = self._download_json(url, video_id) ``` Assume at this point `meta`'s layout is: ```python { "summary": "some fancy summary text", "user": { "name": "uploader name" }, ... } ``` Assume you want to extract `summary` and put it into the resulting info dict as `description`. Since `description` is an optional meta field you should be ready that this key may be missing from the `meta` dict, so that you should extract it like: ```python description = meta.get('summary') # correct ``` and not like: ```python description = meta['summary'] # incorrect ``` The latter will break extraction process with `KeyError` if `summary` disappears from `meta` at some later time but with the former approach extraction will just go ahead with `description` set to `None` which is perfectly fine (remember `None` is equivalent to the absence of data). If the data is nested, do not use `.get` chains, but instead make use of `traverse_obj`. Considering the above `meta` again, assume you want to extract `["user"]["name"]` and put it in the resulting info dict as `uploader` ```python uploader = traverse_obj(meta, ('user', 'name')) # correct ``` and not like: ```python uploader = meta['user']['name'] # incorrect ``` or ```python uploader = meta.get('user', {}).get('name') # incorrect ``` or ```python uploader = try_get(meta, lambda x: x['user']['name']) # old utility ``` Similarly, you should pass `fatal=False` when extracting optional data from a webpage with `_search_regex`, `_html_search_regex` or similar methods, for instance: ```python description = self._search_regex( r']+id="title"[^>]*>([^<]+)<', webpage, 'description', fatal=False) ``` With `fatal` set to `False` if `_search_regex` fails to extract `description` it will emit a warning and continue extraction. You can also pass `default=`, for example: ```python description = self._search_regex( r']+id="title"[^>]*>([^<]+)<', webpage, 'description', default=None) ``` On failure this code will silently continue the extraction with `description` set to `None`. That is useful for metafields that may or may not be present. Another thing to remember is not to try to iterate over `None` Say you extracted a list of thumbnails into `thumbnail_data` and want to iterate over them ```python thumbnail_data = data.get('thumbnails') or [] thumbnails = [{ 'url': item['url'], 'height': item.get('h'), } for item in thumbnail_data if item.get('url')] # correct ``` and not like: ```python thumbnail_data = data.get('thumbnails') thumbnails = [{ 'url': item['url'], 'height': item.get('h'), } for item in thumbnail_data] # incorrect ``` In this case, `thumbnail_data` will be `None` if the field was not found and this will cause the loop `for item in thumbnail_data` to raise a fatal error. Using `or []` avoids this error and results in setting an empty list in `thumbnails` instead. Alternately, this can be further simplified by using `traverse_obj` ```python thumbnails = [{ 'url': item['url'], 'height': item.get('h'), } for item in traverse_obj(data, ('thumbnails', lambda _, v: v['url']))] ``` or, even better, ```python thumbnails = traverse_obj(data, ('thumbnails', ..., {'url': 'url', 'height': 'h'})) ``` ### Provide fallbacks When extracting metadata try to do so from multiple sources. For example if `title` is present in several places, try extracting from at least some of them. This makes it more future-proof in case some of the sources become unavailable. #### Example Say `meta` from the previous example has a `title` and you are about to extract it like: ```python title = meta.get('title') ``` If `title` disappears from `meta` in future due to some changes on the hoster's side the title extraction would fail. Assume that you have some another source you can extract `title` from, for example `og:title` HTML meta of a `webpage`. In this case you can provide a fallback like: ```python title = meta.get('title') or self._og_search_title(webpage) ``` This code will try to extract from `meta` first and if it fails it will try extracting `og:title` from a `webpage`, making the extractor more robust. ### Regular expressions #### Don't capture groups you don't use Capturing group must be an indication that it's used somewhere in the code. Any group that is not used must be non capturing. ##### Example Don't capture id attribute name here since you can't use it for anything anyway. Correct: ```python r'(?:id|ID)=(?P\d+)' ``` Incorrect: ```python r'(id|ID)=(?P\d+)' ``` #### Make regular expressions relaxed and flexible When using regular expressions try to write them fuzzy, relaxed and flexible, skipping insignificant parts that are more likely to change, allowing both single and double quotes for quoted values and so on. ##### Example Say you need to extract `title` from the following HTML code: ```html some fancy title ``` The code for that task should look similar to: ```python title = self._search_regex( # correct r']+class="title"[^>]*>([^<]+)', webpage, 'title') ``` which tolerates potential changes in the `style` attribute's value. Or even better: ```python title = self._search_regex( # correct r']+class=(["\'])title\1[^>]*>(?P[^<]+)', webpage, 'title', group='title') ``` which also handles both single quotes in addition to double quotes. The code definitely should not look like: ```python title = self._search_regex( # incorrect r'<span style="position: absolute; left: 910px; width: 90px; float: right; z-index: 9999;" class="title">(.*?)</span>', webpage, 'title', group='title') ``` or even ```python title = self._search_regex( # incorrect r'<span style=".*?" class="title">(.*?)</span>', webpage, 'title', group='title') ``` Here the presence or absence of other attributes including `style` is irrelevant for the data we need, and so the regex must not depend on it #### Keep the regular expressions as simple as possible, but no simpler Since many extractors deal with unstructured data provided by websites, we will often need to use very complex regular expressions. You should try to use the *simplest* regex that can accomplish what you want. In other words, each part of the regex must have a reason for existing. If you can take out a symbol and the functionality does not change, the symbol should not be there. ##### Example Correct: ```python _VALID_URL = r'https?://(?:www\.)?website\.com/(?:[^/]+/){3,4}(?P<display_id>[^/]+)_(?P<id>\d+)' ``` Incorrect: ```python _VALID_URL = r'https?:\/\/(?:www\.)?website\.com\/[^\/]+/[^\/]+/[^\/]+(?:\/[^\/]+)?\/(?P<display_id>[^\/]+)_(?P<id>\d+)' ``` #### Do not misuse `.` and use the correct quantifiers (`+*?`) Avoid creating regexes that over-match because of wrong use of quantifiers. Also try to avoid non-greedy matching (`?`) where possible since they could easily result in [catastrophic backtracking](https://www.regular-expressions.info/catastrophic.html) Correct: ```python title = self._search_regex(r'<span\b[^>]+class="title"[^>]*>([^<]+)', webpage, 'title') ``` Incorrect: ```python title = self._search_regex(r'<span\b.*class="title".*>(.+?)<', webpage, 'title') ``` ### Long lines policy There is a soft limit to keep lines of code under 100 characters long. This means it should be respected if possible and if it does not make readability and code maintenance worse. Sometimes, it may be reasonable to go upto 120 characters and sometimes even 80 can be unreadable. Keep in mind that this is not a hard limit and is just one of many tools to make the code more readable. For example, you should **never** split long string literals like URLs or some other often copied entities over multiple lines to fit this limit: Conversely, don't unnecessarily split small lines further. As a rule of thumb, if removing the line split keeps the code under 80 characters, it should be a single line. ##### Examples Correct: ```python 'https://www.youtube.com/watch?v=FqZTN594JQw&list=PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` Incorrect: ```python 'https://www.youtube.com/watch?v=FqZTN594JQw&list=' 'PLMYEtVRpaqY00V9W81Cwmzp6N6vZqfUKD4' ``` Correct: ```python uploader = traverse_obj(info, ('uploader', 'name'), ('author', 'fullname')) ``` Incorrect: ```python uploader = traverse_obj( info, ('uploader', 'name'), ('author', 'fullname')) ``` Correct: ```python formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') ``` Incorrect: ```python formats = self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', note='Downloading HD m3u8 information', errnote='Unable to download HD m3u8 information') ``` ### Quotes Always use single quotes for strings (even if the string has `'`) and double quotes for docstrings. Use `'''` only for multi-line strings. An exception can be made if a string has multiple single quotes in it and escaping makes it *significantly* harder to read. For f-strings, use you can use double quotes on the inside. But avoid f-strings that have too many quotes inside. ### Inline values Extracting variables is acceptable for reducing code duplication and improving readability of complex expressions. However, you should avoid extracting variables used only once and moving them to opposite parts of the extractor file, which makes reading the linear flow difficult. #### Examples Correct: ```python return { 'title': self._html_search_regex(r'<h1>([^<]+)</h1>', webpage, 'title'), # ...some lines of code... } ``` Incorrect: ```python TITLE_RE = r'<h1>([^<]+)</h1>' # ...some lines of code... title = self._html_search_regex(TITLE_RE, webpage, 'title') # ...some lines of code... return { 'title': title, # ...some lines of code... } ``` ### Collapse fallbacks Multiple fallback values can quickly become unwieldy. Collapse multiple fallback values into a single expression via a list of patterns. #### Example Good: ```python description = self._html_search_meta( ['og:description', 'description', 'twitter:description'], webpage, 'description', default=None) ``` Unwieldy: ```python description = ( self._og_search_description(webpage, default=None) or self._html_search_meta('description', webpage, default=None) or self._html_search_meta('twitter:description', webpage, default=None)) ``` Methods supporting list of patterns are: `_search_regex`, `_html_search_regex`, `_og_search_property`, `_html_search_meta`. ### Trailing parentheses Always move trailing parentheses used for grouping/functions after the last argument. On the other hand, multi-line literal list/tuple/dict/set should closed be in a new line. Generators and list/dict comprehensions may use either style #### Examples Correct: ```python url = traverse_obj(info, ( 'context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list) ``` Correct: ```python url = traverse_obj( info, ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list) ``` Incorrect: ```python url = traverse_obj( info, ('context', 'dispatcher', 'stores', 'VideoTitlePageStore', 'data', 'video', 0, 'VideoUrlSet', 'VideoUrl'), list ) ``` Correct: ```python f = { 'url': url, 'format_id': format_id, } ``` Incorrect: ```python f = {'url': url, 'format_id': format_id} ``` Correct: ```python formats = [process_formats(f) for f in format_data if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable')] ``` Correct: ```python formats = [ process_formats(f) for f in format_data if f.get('type') in ('hls', 'dash', 'direct') and f.get('downloadable') ] ``` ### Use convenience conversion and parsing functions Wrap all extracted numeric data into safe functions from [`yt_dlp/utils/`](yt_dlp/utils/): `int_or_none`, `float_or_none`. Use them for string to number conversions as well. Use `url_or_none` for safe URL processing. Use `traverse_obj` and `try_call` (superseeds `dict_get` and `try_get`) for safe metadata extraction from parsed JSON. Use `unified_strdate` for uniform `upload_date` or any `YYYYMMDD` meta field extraction, `unified_timestamp` for uniform `timestamp` extraction, `parse_filesize` for `filesize` extraction, `parse_count` for count meta fields extraction, `parse_resolution`, `parse_duration` for `duration` extraction, `parse_age_limit` for `age_limit` extraction. Explore [`yt_dlp/utils/`](yt_dlp/utils/) for more useful convenience functions. #### Examples ```python description = traverse_obj(response, ('result', 'video', 'summary'), expected_type=str) thumbnails = traverse_obj(response, ('result', 'thumbnails', ..., 'url'), expected_type=url_or_none) video = traverse_obj(response, ('result', 'video', 0), default={}, expected_type=dict) duration = float_or_none(video.get('durationMs'), scale=1000) view_count = int_or_none(video.get('views')) ``` # My pull request is labeled pending-fixes The `pending-fixes` label is added when there are changes requested to a PR. When the necessary changes are made, the label should be removed. However, despite our best efforts, it may sometimes happen that the maintainer did not see the changes or forgot to remove the label. If your PR is still marked as `pending-fixes` a few days after all requested changes have been made, feel free to ping the maintainer who labeled your issue and ask them to re-review and remove the label. # EMBEDDING YT-DLP See [README.md#embedding-yt-dlp](README.md#embedding-yt-dlp) for instructions on how to embed yt-dlp in another Python program ���yt-dlp-2024.09.27/CONTRIBUTORS����������������������������������������������������������������������0000664�0000000�0000000�00000015152�14675634471�0015237�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������pukkandan (owner) shirt-dev (collaborator) coletdjnz/colethedj (collaborator) Ashish0804 (collaborator) bashonly (collaborator) Grub4K (collaborator) h-h-h-h pauldubois98 nixxo GreyAlien502 kyuyeunk siikamiika jbruchon alexmerkel glenn-slayden Unrud wporr mariuszskon ohnonot samiksome alxnull FelixFrog Zocker1999NET kurumigi bbepis animelover1984/horahoradev Pccode66 RobinD42 hseg DennyDai codeasashu teesid kevinoconnor7 damianoamatruda 2ShedsJackson CXwudi xtkoba llacb47 hheimbuerger B0pol lkho fstirlitz Lamieur tsukumijima Hadi0609 b5eff52 craftingmod tpikonen tripulse king-millez alex-gedeon hhirtz louie-github MinePlayersPE olifre rhsmachine/zenerdi0de nihil-admirari krichbanana ohmybahgosh nyuszika7h blackjack4494 pyx TpmKranz mzbaulhaque zackmark29 mbway zerodytrash wesnm pento rigstot dirkf funniray Jessecar96 jhwgh1968 kikuyan max-te nchilada pgaig PSlava stdedos u-spec-png Sipherdrakon kidonng smege1001 tandy1000 IONECarter capntrips mrfade ParadoxGBB wlritchi NeroBurner mahanstreamer alerikaisattera Derkades BunnyHelp i6t std-move Chocobozzz ouwou korli octotherp CeruleanSky zootedb0t chao813 ChillingPepper ConquerorDopy dalanmiller DigitalDJ f4pp3rk1ng gesa Jules-A makeworld-the-better-one MKSherbini mrx23dot poschi3 raphaeldore renalid sleaux-meaux sulyi tmarki Vangelis66 AjaxGb ajj8 jakubadamw jfogelman timethrow sarnoud Bojidarist 18928172992817182/gustaf nixklai smplayer-dev Zirro CrypticSignal flashdagger fractalf frafra kaz-us ozburo rhendric sdomi selfisekai stanoarn 0xA7404A/Aurora 4a1e2y5 aarubui chio0hai cntrl-s Deer-Spangle DEvmIb Grabien/MaximVol j54vc1bk mpeter50 mrpapersonic pabs3 staubichsauger xenova Yakabuff zulaport ehoogeveen-medweb PilzAdam zmousm iw0nderhow unit193 TwoThousandHedgehogs/KathrynElrod Jertzukka cypheron Hyeeji bwildenhain C0D3D3V kebianizao Lapin0t abdullah-if DavidSkrundz mkubecek raleeper YuenSzeHong Sematre jaller94 r5d julien-hadleyjack git-anony-mouse mdawar trassshhub foghawk k3ns1n teridon mozlima timendum ischmidt20 CreaValix sian1468 arkamar hyano KiberInfinity tejing1 Bricio lazypete365 Aniruddh-J blackgear CplPwnies cyberfox1691 FestplattenSchnitzel hatienl0i261299 iphoting jakeogh lukasfink1 lyz-code marieell mdpauley Mipsters mxmehl ofkz P-reducible pycabbage regarten Ronnnny schn0sch s0u1h MrRawes cffswb danielyli 1-Byte mehq dzek69 aaearon panatexxa kmark un-def goggle Soebb Fam0r bohwaz dodrian vvto33 ca-za connercsbn diegorodriguezv ekangmonyet elyse0 evansp GiedriusS HE7086 JordanWeatherby m4tu4g MarwenDallel nevack putnam rand-net vertan Wikidepia Yipten moench-tegeder christoph-heinrich HobbyistDev LunarFang416 sbor23 aurelg adamanldo gamer191 vkorablin Burve mnn ZhymabekRoman mozbugbox aejdl ping sqrtNOT bubbleguuum darkxex miseran StefanLobbenmeier crazymoose77756 nomevi Brett824 pingiun dosy4ev EhtishamSabir Ferdi265 FirefoxMetzger ftk lamby llamasblade lockmatrix misaelaguayo odo2063 pritam20ps05 scy sheerluck AxiosDeminence DjesonPV eren-kemer freezboltz Galiley haobinliang Mehavoid winterbird-code yashkc2025 aldoridhoni jacobtruman masta79 palewire cgrigis DavidH-2022 dfaker jackyyf ohaiibuzzle SamantazFox shreyasminocha tejasa97 xenov satan1st 0xGodspeed 5736d79 587021c basrieter Bobscorn CNugteren columndeeply DoubleCouponDay Fabi019 GautamMKGarg itachi-19 jeroenj josanabr LiviaMedeiros nikita-moor snapdgn SuperSonicHub1 tannertechnology Timendum tobi1805 TokyoBlackHole ajayyy Alienmaster bsun0000 changren-wcr ClosedPort22 CrankDatSouljaBoy cruel-efficiency endotronic Generator gibson042 How-Bout-No invertico jahway603 jwoglom lksj megapro17 mlampe MrOctopus nosoop puc9 sashashura schnusch SG5 the-marenga tkgmomosheep vitkhab glensc synthpop123 tntmod54321 milkknife Bnyro CapacitorSet stelcodes skbeh muddi900 digitall chengzhicn mexus JChris246 redraskal Spicadox barsnick docbender KurtBestor Chrissi2812 FrederikNS gschizas JC-Chung mzhou OndrejBakan ab4cbef aionescu amra ByteDream carusocr chexxor felixonmars FrankZ85 FriedrichRehren gregsadetsky LeoniePhiline LowSuggestion912 Matumo OIRNOIR OMEGARAZER oxamun pmitchell86 qbnu qulaz rebane2001 road-master rohieb sdht0 seproDev Hill-98 LXYan2333 mushbite venkata-krishnas 7vlad7 alexklapheke arobase-che bepvte bergoid blmarket brandon-dacrib c-basalt CoryTibbettsDev Cyberes D0LLYNH0 danog DataGhost falbrechtskirchinger foreignBlade garret1317 hasezoey hoaluvn ItzMaxTV ivanskodje jo-nike kangalio linsui makew0rld menschel mikf mrscrapy NDagestad Neurognostic NextFire nick-cd permunkle pzhlkj6612 ringus1 rjy Schmoaaaaah sjthespian theperfectpunk toomyzoom truedread TxI5 unbeatable-101 vampirefrog vidiot720 viktor-enzell zhgwn barthelmannk berkanteber OverlordQ rexlambert22 Ti4eeT4e AmanSal1 bbilly1 meliber nnoboa rdamas RfadnjdExt urectanc nao20010128nao/Lesmiscore 04-pasha-04 aaruni96 aky-01 AmirAflak ApoorvShah111 at-wat davinkevin demon071 denhotte FinnRG fireattack Frankgoji GD-Slime hatsomatt ifan-t kshitiz305 kylegustavo mabdelfattah nathantouze niemands Rajeshwaran2001 RedDeffender Rohxn16 sb0stn SevenLives simon300000 snixon soundchaser128 szabyg trainman261 trislee wader Yalab7 zhallgato zhong-yiyu Zprokkel AS6939 drzraf handlerug jiru madewokherd xofe awalgarg midnightveil naginatana Riteo 1100101 aniolpages bartbroere CrendKing Esokrates HitomaruKonpaku LoserFox peci1 saintliao shubhexists SirElderling almx elivinsky starius TravisDupes amir16yp Fymyte Ganesh910 hashFactory kclauhk Kyraminol lstrojny middlingphys NickCis nicodato prettykool S-Aarab sonmezberkay TSRBerry 114514ns agibson-fl alard alien-developers antonkesy ArnauvGilotra Arthurszzz Bibhav48 Bl4Cc4t boredzo Caesim404 chkuendig chtk Danish-H dasidiot diman8 divStar DmitryScaletta feederbox826 gmes78 gonzalezjo hui1601 infanf jazz1611 jingtra jkmartindale johnvictorfs llistochek marcdumais martinxyz michal-repo mrmedieval nbr23 Nicals Noor-5 NurTasin pompos02 Pranaxcau pwaldhauer RaduManole RalphORama rrgomes ruiminggu rvsit sefidel shmohawk Snack-X src-tinkerer stilor syntaxsurge t-nil ufukk vista-narvas x11x xpadev-net Xpl0itU YoshichikaAAA zhijinwuu alb hruzgar kasper93 leoheitmannruiz luiso1979 nipotan Offert4324 sta1us Tomoka1 trwstin alexhuot1 clienthax DaPotato69 emqi hugohaa imanoreotwe JakeFinley96 lostfictions minamotorin ocococococ Podiumnoche RasmusAntons roeniss shoxie007 Szpachlarz The-MAGI TuxCoder voidful vtexier WyohKnott trueauracoral ASertacAkkaya axpauls chilinux hafeoz JSubelj jucor megumintyan mgedmin Niluge-KiWi peisenwang TheZ3ro tippfehlr varunchopra DrakoCpp PatrykMis DinhHuy2010 exterrestris harbhim LeSuisse DunnesH iancmy mokrueger luvyana szantnerb hugepower scribblemaniac Codenade Demon000 Deukhoofd grqz hibes Khaoklong51 kieraneglin lengzuo naglis ndyanx otovalek quad rakslice sahilsinghss73 tony-hn xingchensong ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/Changelog.md����������������������������������������������������������������������0000664�0000000�0000000�00001706327�14675634471�0015604�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Changelog <!-- # To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master --> ### 2024.09.27 #### Important changes - **The minimum *recommended* Python version has been raised to 3.9** Since Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086) #### Core changes - [Allow `none` arg to negate `--convert-subs` and `--convert-thumbnails`](https://github.com/yt-dlp/yt-dlp/commit/c08e0b20b5edd8957b8318716bc14e896d1b96f4) ([#11066](https://github.com/yt-dlp/yt-dlp/issues/11066)) by [kieraneglin](https://github.com/kieraneglin) - [Fix format sorting bug with vp9.2 vcodec](https://github.com/yt-dlp/yt-dlp/commit/8f4ea14680c7865d8ffac10a9174205d1d84ada7) ([#10884](https://github.com/yt-dlp/yt-dlp/issues/10884)) by [rakslice](https://github.com/rakslice) - [Raise minimum recommended Python version to 3.9](https://github.com/yt-dlp/yt-dlp/commit/cca534cd9e6850c70244f225a4a1895ef4bcdbec) ([#11098](https://github.com/yt-dlp/yt-dlp/issues/11098)) by [bashonly](https://github.com/bashonly) - **cookies**: [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/b397a64691421ace5df09457c2a764821a2dc6f2) ([#11090](https://github.com/yt-dlp/yt-dlp/issues/11090)) by [seproDev](https://github.com/seproDev) - **utils**: `mimetype2ext`: [Recognize `aacp` as `aac`](https://github.com/yt-dlp/yt-dlp/commit/cc85596d5b59f0c14e9381b3675f619c1e12e597) ([#10860](https://github.com/yt-dlp/yt-dlp/issues/10860)) by [bashonly](https://github.com/bashonly) #### Extractor changes - [Fix JW Player format parsing](https://github.com/yt-dlp/yt-dlp/commit/409f8e9e3b4bde81ef76fc563256f876d2ff8099) ([#10956](https://github.com/yt-dlp/yt-dlp/issues/10956)) by [seproDev](https://github.com/seproDev) - [Handle decode errors when reading responses](https://github.com/yt-dlp/yt-dlp/commit/325001317d97f4545d66fac44c4ba772c6f45f22) ([#10868](https://github.com/yt-dlp/yt-dlp/issues/10868)) by [bashonly](https://github.com/bashonly) - **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7f909046f4dc0fba472b4963145aef6e0d42491b) ([#11101](https://github.com/yt-dlp/yt-dlp/issues/11101)) by [bashonly](https://github.com/bashonly) - **adn**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/cc88a54bb1ef285154775f8a6a413335ce4c71ce) ([#10749](https://github.com/yt-dlp/yt-dlp/issues/10749)) by [infanf](https://github.com/infanf) - **asobistage**: [Support redirected URLs](https://github.com/yt-dlp/yt-dlp/commit/a7d3235c84dac57a127cbe0ff38f7f7c2fdd8fa0) ([#10768](https://github.com/yt-dlp/yt-dlp/issues/10768)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **bandcamp**: user: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0176547f16a3642cd71627126e9dfc24981e20) ([#10328](https://github.com/yt-dlp/yt-dlp/issues/10328)) by [bashonly](https://github.com/bashonly), [quad](https://github.com/quad) - **beacon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b4760c778d0c92c6e3f2bc8346cd72c8f08595ae) ([#9901](https://github.com/yt-dlp/yt-dlp/issues/9901)) by [Deukhoofd](https://github.com/Deukhoofd) - **bilibili** - [Fix chapters and subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/a2000bc85730c950351d78bb818493dc39dca3cb) ([#11099](https://github.com/yt-dlp/yt-dlp/issues/11099)) by [bashonly](https://github.com/bashonly) - [Fix festival URL support](https://github.com/yt-dlp/yt-dlp/commit/b43bd864851f2862e26caa85461c5d825d49d463) ([#10740](https://github.com/yt-dlp/yt-dlp/issues/10740)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz) - **biliintl**: [Fix referer header](https://github.com/yt-dlp/yt-dlp/commit/a06bb586795ebab87a2356923acfc674d6f0e152) ([#11003](https://github.com/yt-dlp/yt-dlp/issues/11003)) by [Khaoklong51](https://github.com/Khaoklong51) - **dropbox**: [Fix password-protected video support](https://github.com/yt-dlp/yt-dlp/commit/63da31b3b29af90062d8a72a905ffe4b5e499042) ([#10735](https://github.com/yt-dlp/yt-dlp/issues/10735)) by [ndyanx](https://github.com/ndyanx) - **ertgr**: [Fix video extraction](https://github.com/yt-dlp/yt-dlp/commit/416686ed0cf792ec44ab059f3b229dd776077e14) ([#11091](https://github.com/yt-dlp/yt-dlp/issues/11091)) by [seproDev](https://github.com/seproDev) - **eurosport**: [Support local URL variants](https://github.com/yt-dlp/yt-dlp/commit/f0bb28504c8c2b75ee3e5796aed50de2a7f90a1b) ([#10785](https://github.com/yt-dlp/yt-dlp/issues/10785)) by [seproDev](https://github.com/seproDev) - **facebook** - ads: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d62fef7e07d454c0d2ba2d69fb96d691dba1ded0) ([#10704](https://github.com/yt-dlp/yt-dlp/issues/10704)) by [kclauhk](https://github.com/kclauhk) - reel: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/0e1b941c6b2caa688b0d3332e723d16dbafa4311) by [lengzuo](https://github.com/lengzuo) - **germanupa**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/124f058b546d652a359c67025bb479789bfbef0b) ([#10538](https://github.com/yt-dlp/yt-dlp/issues/10538)) by [grqz](https://github.com/grqz) - **hgtvde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a555389c9bb32e589e00b4664974423fb7b04dcd) ([#10992](https://github.com/yt-dlp/yt-dlp/issues/10992)) by [bashonly](https://github.com/bashonly), [rdamas](https://github.com/rdamas) - **huya**: video: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/25c1cdaa2650563494d3bf00a38f72d0d9486bff) ([#10686](https://github.com/yt-dlp/yt-dlp/issues/10686)) by [hugepower](https://github.com/hugepower) - **iprima**: [Fix zoom URL support](https://github.com/yt-dlp/yt-dlp/commit/4a27b8f092f7f7c10b7a334d3535c97c2af02f0a) ([#10959](https://github.com/yt-dlp/yt-dlp/issues/10959)) by [otovalek](https://github.com/otovalek) - **khanacademy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0fba08485b6445b72b5b63ae23ca2a73fa5d967f) ([#10913](https://github.com/yt-dlp/yt-dlp/issues/10913)) by [seproDev](https://github.com/seproDev) - **kick** - clips: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/0aa4426e9a35f7f8e184f1f2082b3b313c1448f7) ([#11107](https://github.com/yt-dlp/yt-dlp/issues/11107)) by [bashonly](https://github.com/bashonly) - vod: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/173d54c151b987409e3eb09552d8d89ed8fc50f7) ([#10988](https://github.com/yt-dlp/yt-dlp/issues/10988)) by [bashonly](https://github.com/bashonly), [grqz](https://github.com/grqz) - **kika**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e6f48ca80821939c1fd11ec2a0cdbf2fba9b258a) ([#5788](https://github.com/yt-dlp/yt-dlp/issues/5788)) by [1100101](https://github.com/1100101) - **lnkgo**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/fa83d0b36bc43d30fe9241c1e923f4614864b758) ([#10904](https://github.com/yt-dlp/yt-dlp/issues/10904)) by [naglis](https://github.com/naglis) - **loom**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/7509d692b37a7ec6230ea75bfe1e44a8de5eefce) ([#10760](https://github.com/yt-dlp/yt-dlp/issues/10760)) by [kclauhk](https://github.com/kclauhk) - **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e2b3634e299be9c16a247ece3b1858d83889c324) ([#11083](https://github.com/yt-dlp/yt-dlp/issues/11083)) by [szantnerb](https://github.com/szantnerb) - **mojevideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/28b0ecba2af5b4919f198474b3d00a76ef322c31) ([#11019](https://github.com/yt-dlp/yt-dlp/issues/11019)) by [04-pasha-04](https://github.com/04-pasha-04), [pzhlkj6612](https://github.com/pzhlkj6612) - **niconico**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/eabb4680fdb09ba1f48d174a700a2e3b43f82add) ([#11103](https://github.com/yt-dlp/yt-dlp/issues/11103)) by [bashonly](https://github.com/bashonly) - **nzz**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a9bc8c3630378bc29f0266126b503f6190c0430) ([#10461](https://github.com/yt-dlp/yt-dlp/issues/10461)) by [1-Byte](https://github.com/1-Byte) - **patreoncampaign**: [Support API URLs](https://github.com/yt-dlp/yt-dlp/commit/232e6db30c474d1b387e405342f34173ceeaf832) ([#10734](https://github.com/yt-dlp/yt-dlp/issues/10734)) by [bashonly](https://github.com/bashonly), [hibes](https://github.com/hibes) - **pinterest**: [Extend `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8c078fe28b0ffc15ef9646346c00c592fe71a78) ([#10867](https://github.com/yt-dlp/yt-dlp/issues/10867)) by [bashonly](https://github.com/bashonly), [sahilsinghss73](https://github.com/sahilsinghss73) - **radiko**: [Extract unique `id` values](https://github.com/yt-dlp/yt-dlp/commit/c8d096c5ce111411fbdbe2abb8fed54f317a6182) ([#10726](https://github.com/yt-dlp/yt-dlp/issues/10726)) by [garret1317](https://github.com/garret1317) - **rtp**: [Support more subpages](https://github.com/yt-dlp/yt-dlp/commit/d02df303d8e49390599db9f34482697e4d1cf5b2) ([#10787](https://github.com/yt-dlp/yt-dlp/issues/10787)) by [Demon000](https://github.com/Demon000) - **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ad0b857f459a6d390fbf124183916218c52f223a) ([#11049](https://github.com/yt-dlp/yt-dlp/issues/11049)) by [tony-hn](https://github.com/tony-hn) - **rutube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/41be32e78c3845000dbac188ffb90ea3ea7c4dfa) ([#10844](https://github.com/yt-dlp/yt-dlp/issues/10844)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **samplefocus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/46f4c80bc363ee8116c33d37f65202e6c3470954) ([#10947](https://github.com/yt-dlp/yt-dlp/issues/10947)) by [seproDev](https://github.com/seproDev) - **screenrec**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36f9e602ad55679764bc75a4f67f7562b1d6adcf) ([#10917](https://github.com/yt-dlp/yt-dlp/issues/10917)) by [naglis](https://github.com/naglis) - **sen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/41a241ca6ffb95b3d9aaf4f42106ca8cba9af1a6) ([#10952](https://github.com/yt-dlp/yt-dlp/issues/10952)) by [seproDev](https://github.com/seproDev) - **servus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/300c91274f7ea5b1b0528fc5ee11cf1a61d4079e) ([#10944](https://github.com/yt-dlp/yt-dlp/issues/10944)) by [seproDev](https://github.com/seproDev) - **snapchatspotlight**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b37417e4f934fd8909788b493d017777155b0ae5) ([#11030](https://github.com/yt-dlp/yt-dlp/issues/11030)) by [seproDev](https://github.com/seproDev) - **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a8a05aebb49693e78e1123015837ed5e961ff76) ([#11010](https://github.com/yt-dlp/yt-dlp/issues/11010)) by [diman8](https://github.com/diman8) - **tenplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d8d473002b654ab0e7b97ead869f58b4361eeae1) ([#10928](https://github.com/yt-dlp/yt-dlp/issues/10928)) by [aarubui](https://github.com/aarubui) - **tiktok**: [Fix web formats extraction](https://github.com/yt-dlp/yt-dlp/commit/3ad0b7f422d547204df687b6d0b2d9110fff3990) ([#11074](https://github.com/yt-dlp/yt-dlp/issues/11074)) by [bashonly](https://github.com/bashonly) - **twitter**: spaces: [Support video spaces](https://github.com/yt-dlp/yt-dlp/commit/bef1d4d6fc9493fda7f75e2289c07c507d10092f) ([#10789](https://github.com/yt-dlp/yt-dlp/issues/10789)) by [bashonly](https://github.com/bashonly) - **vidflex**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e978c312d6550a6ae4c9df18001afb1b420cb72f) ([#10002](https://github.com/yt-dlp/yt-dlp/issues/10002)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **vimeo** - [Always try to extract original format](https://github.com/yt-dlp/yt-dlp/commit/4115c24d157c5b5f63089d75c4e0f51d1f8b4489) ([#10721](https://github.com/yt-dlp/yt-dlp/issues/10721)) by [bashonly](https://github.com/bashonly) (With fixes in [e8e6a98](https://github.com/yt-dlp/yt-dlp/commit/e8e6a982a1b659eed434d225d7922f632bac6568) by [seproDev](https://github.com/seproDev)) - [Fix HLS audio format sorting](https://github.com/yt-dlp/yt-dlp/commit/a1b4ac2b8ed8e6eaa56044d439f1e0d00c2ba218) ([#11082](https://github.com/yt-dlp/yt-dlp/issues/11082)) by [fireattack](https://github.com/fireattack) - **watchespn**: [Improve auth support](https://github.com/yt-dlp/yt-dlp/commit/7adff8caf152dcf96d03aff69ed8545c0a63567c) ([#10910](https://github.com/yt-dlp/yt-dlp/issues/10910)) by [ischmidt20](https://github.com/ischmidt20) - **wistia**: [Support password-protected videos](https://github.com/yt-dlp/yt-dlp/commit/9f5c9a90898c5a1e672922d9cd799716c73cee34) ([#11100](https://github.com/yt-dlp/yt-dlp/issues/11100)) by [bashonly](https://github.com/bashonly) - **ximalaya**: [Add VIP support](https://github.com/yt-dlp/yt-dlp/commit/3dfd720d098b4d49d69cfc77e6376f22bcd90934) ([#10832](https://github.com/yt-dlp/yt-dlp/issues/10832)) by [seproDev](https://github.com/seproDev), [xingchensong](https://github.com/xingchensong) - **xinpianchang**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3aa0156e05662923d130ddbc1c82596e38c01a00) ([#10950](https://github.com/yt-dlp/yt-dlp/issues/10950)) by [seproDev](https://github.com/seproDev) - **yleareena**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/48d629d461e05b1b19f5e53dc959bb9ebe95da42) ([#11104](https://github.com/yt-dlp/yt-dlp/issues/11104)) by [bashonly](https://github.com/bashonly) - **youtube** - [Add `po_token`, `visitor_data`, `data_sync_id` extractor args](https://github.com/yt-dlp/yt-dlp/commit/3a3bd00037e9908e87da4fa9f2ad772aa34dc60e) ([#10648](https://github.com/yt-dlp/yt-dlp/issues/10648)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [seproDev](https://github.com/seproDev) (With fixes in [fa2be9a](https://github.com/yt-dlp/yt-dlp/commit/fa2be9a7c63babede07480151363e54eee5702bd) by [bashonly](https://github.com/bashonly)) - [Support excluding `player_client`s in extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/49f3741a820ed142f6866317c2e7d247b130960e) ([#10710](https://github.com/yt-dlp/yt-dlp/issues/10710)) by [bashonly](https://github.com/bashonly) - clip: [Prioritize `https` formats](https://github.com/yt-dlp/yt-dlp/commit/1d84b780cf33a1d84756825ac23f990a905703df) ([#11102](https://github.com/yt-dlp/yt-dlp/issues/11102)) by [bashonly](https://github.com/bashonly) - tab: [Fix shorts tab extraction](https://github.com/yt-dlp/yt-dlp/commit/9431777b4c37129a6093080c77ca59960afbb9d7) ([#10938](https://github.com/yt-dlp/yt-dlp/issues/10938)) by [seproDev](https://github.com/seproDev) #### Networking changes - [Fix handler not being added to RequestError](https://github.com/yt-dlp/yt-dlp/commit/d1c4d88b2d912e8da5e76db455562ca63b1af690) ([#10955](https://github.com/yt-dlp/yt-dlp/issues/10955)) by [coletdjnz](https://github.com/coletdjnz) - [Pin `curl-cffi` version to < 0.7.2](https://github.com/yt-dlp/yt-dlp/commit/5bb1aa04dafce13ba9de707ea53169fab58b5207) ([#11092](https://github.com/yt-dlp/yt-dlp/issues/11092)) by [bashonly](https://github.com/bashonly) - **Request Handler**: websockets: [Upgrade websockets to 13.0](https://github.com/yt-dlp/yt-dlp/commit/6f9e6537434562d513d0c9b68ced8a61ade94a64) ([#10815](https://github.com/yt-dlp/yt-dlp/issues/10815)) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - **build** - [Bump PyInstaller version pin to `>=6.10.0`](https://github.com/yt-dlp/yt-dlp/commit/fb8b7f226d251e521a89b23c415e249e5b788e5c) ([#10709](https://github.com/yt-dlp/yt-dlp/issues/10709)) by [bashonly](https://github.com/bashonly) - [Pin `delocate` version for `macos`](https://github.com/yt-dlp/yt-dlp/commit/7e41628ff523b3fe373b0981a5db441358980dab) ([#10901](https://github.com/yt-dlp/yt-dlp/issues/10901)) by [bashonly](https://github.com/bashonly) - **ci** - [Add comment sanitization workflow](https://github.com/yt-dlp/yt-dlp/commit/b6200bdcf3a9415ae36859188f9a57e3e461c696) ([#10915](https://github.com/yt-dlp/yt-dlp/issues/10915)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Add issue tracker anti-spam protection](https://github.com/yt-dlp/yt-dlp/commit/ad9a8115aa29a1a95c961b16fcf129a228d98f50) ([#10861](https://github.com/yt-dlp/yt-dlp/issues/10861)) by [bashonly](https://github.com/bashonly) - **cleanup**: Miscellaneous: [c6387ab](https://github.com/yt-dlp/yt-dlp/commit/c6387abc1af9842bb0541288a5610abba9b1ab51) by [bashonly](https://github.com/bashonly), [Codenade](https://github.com/Codenade), [coletdjnz](https://github.com/coletdjnz), [grqz](https://github.com/grqz), [Grub4K](https://github.com/Grub4K), [pzhlkj6612](https://github.com/pzhlkj6612), [seproDev](https://github.com/seproDev) ### 2024.08.06 #### Core changes - **jsinterp**: [Improve `slice` implementation](https://github.com/yt-dlp/yt-dlp/commit/bb8bf1db993f59752d20b73b861bd55e40cf0e31) ([#10664](https://github.com/yt-dlp/yt-dlp/issues/10664)) by [seproDev](https://github.com/seproDev) #### Extractor changes - **discoveryplusitaly**: [Support sport and olympics URLs](https://github.com/yt-dlp/yt-dlp/commit/e7d73bc4531ee3f91a46b15e218dcc1fbeb6226c) ([#10655](https://github.com/yt-dlp/yt-dlp/issues/10655)) by [bashonly](https://github.com/bashonly) - **gem.cbc.ca**: live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc5eecfa31c9571b6031cc3968aaa0394be55d7a) ([#10565](https://github.com/yt-dlp/yt-dlp/issues/10565)) by [bashonly](https://github.com/bashonly), [scribblemaniac](https://github.com/scribblemaniac) - **niconico**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4d9231208332d4c32364b8cd814bff8b20232cae) ([#10677](https://github.com/yt-dlp/yt-dlp/issues/10677)) by [bashonly](https://github.com/bashonly) - **olympics**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/919540a9644e55deb78cdd6751757ec8fdaf76f4) ([#10625](https://github.com/yt-dlp/yt-dlp/issues/10625)) by [bashonly](https://github.com/bashonly) - **youku**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0088c6de23d832b117061a33e984dc452d992e9c) ([#10626](https://github.com/yt-dlp/yt-dlp/issues/10626)) by [hugepower](https://github.com/hugepower) - **youtube** - [Change default player clients to `ios,web_creator`](https://github.com/yt-dlp/yt-dlp/commit/406f4c2e47502fffc1b0c210b4ee6487c89a44cb) ([#10674](https://github.com/yt-dlp/yt-dlp/issues/10674)) by [bashonly](https://github.com/bashonly) - [Fix `n` function name extraction for player `b12cc44b`](https://github.com/yt-dlp/yt-dlp/commit/c86891eb9434b4d7eec426d38c0c625b5e13cb2f) ([#10668](https://github.com/yt-dlp/yt-dlp/issues/10668)) by [seproDev](https://github.com/seproDev) ### 2024.08.01 #### Core changes - **utils**: `unified_timestamp`: [Recognize Sunday](https://github.com/yt-dlp/yt-dlp/commit/6daf2c27c0464fba98337be30de0b66d520d0db1) ([#10589](https://github.com/yt-dlp/yt-dlp/issues/10589)) by [bashonly](https://github.com/bashonly) #### Extractor changes - **abematv**: [Fix availability extraction](https://github.com/yt-dlp/yt-dlp/commit/ef36d517f9b05785d61abca7691d9ab7d63cc75c) ([#10569](https://github.com/yt-dlp/yt-dlp/issues/10569)) by [middlingphys](https://github.com/middlingphys) - **cbc.ca**: player: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/94a1c5e642e468cebeb51f74c6c220434cb47d96) ([#10302](https://github.com/yt-dlp/yt-dlp/issues/10302)) by [bashonly](https://github.com/bashonly), [trainman261](https://github.com/trainman261) - **discoveryplus**: [Support olympics URLs](https://github.com/yt-dlp/yt-dlp/commit/0b7728618417e1aa382722a4d29b916b594d4459) ([#10566](https://github.com/yt-dlp/yt-dlp/issues/10566)) by [bashonly](https://github.com/bashonly) - **kick**: clips: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bb3936ae2b3ce96d0b53f9e17cad1082058f032b) ([#10572](https://github.com/yt-dlp/yt-dlp/issues/10572)) by [luvyana](https://github.com/luvyana) - **learningonscreen**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/fe15d3178e242803ae7a934b90137f13598eba2e) ([#10590](https://github.com/yt-dlp/yt-dlp/issues/10590)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7e3e4779ad13e4511c9ba3869879e53f0267bd7a) ([#10605](https://github.com/yt-dlp/yt-dlp/issues/10605)) by [szantnerb](https://github.com/szantnerb) - **mlbtv**: [Fix makeup game extraction](https://github.com/yt-dlp/yt-dlp/commit/4b69e1b53ea21e631cd5dd68ff531e2f1671ec17) ([#10607](https://github.com/yt-dlp/yt-dlp/issues/10607)) by [bashonly](https://github.com/bashonly) - **olympics**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2f1ddfe12a2c174bc777264c5c8ffe7ca0922d94) ([#10604](https://github.com/yt-dlp/yt-dlp/issues/10604)) by [bashonly](https://github.com/bashonly) - **tva**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/28d485714fef88937c82635438afba5db81f9089) ([#10567](https://github.com/yt-dlp/yt-dlp/issues/10567)) by [bashonly](https://github.com/bashonly) - **tver**: [Support olympic URLs](https://github.com/yt-dlp/yt-dlp/commit/5260696b1cba77161828941fdb38f09f14ac6c60) ([#10600](https://github.com/yt-dlp/yt-dlp/issues/10600)) by [vvto33](https://github.com/vvto33) - **vimeo**: review: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/2b6df93a243bdfb9d6bb5c1e18020625cd02d465) ([#10598](https://github.com/yt-dlp/yt-dlp/issues/10598)) by [bashonly](https://github.com/bashonly) - **youtube** - [Change default player clients to `ios,tv`](https://github.com/yt-dlp/yt-dlp/commit/efb42763dec23ccf6a2e3bac3afbfefce8efd012) ([#10457](https://github.com/yt-dlp/yt-dlp/issues/10457)) by [seproDev](https://github.com/seproDev) - [Fix `n` function name extraction for player `20dfca59`](https://github.com/yt-dlp/yt-dlp/commit/011b4a04db2a636c3ef0a0ad4e2d3ae482c9fd76) ([#10611](https://github.com/yt-dlp/yt-dlp/issues/10611)) by [bashonly](https://github.com/bashonly) - [Fix age-verification workaround](https://github.com/yt-dlp/yt-dlp/commit/d19fcb934269465fd707e68a87f735ec6983e93d) ([#10610](https://github.com/yt-dlp/yt-dlp/issues/10610)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Player client maintenance](https://github.com/yt-dlp/yt-dlp/commit/0e539617a41913c7da1edd74fb6543c10ad727b3) ([#10573](https://github.com/yt-dlp/yt-dlp/issues/10573)) by [bashonly](https://github.com/bashonly) #### Misc. changes - **cleanup**: Miscellaneous: [ffd7781](https://github.com/yt-dlp/yt-dlp/commit/ffd7781d6588926f820b44a34b9e6e3068fb9f97) by [bashonly](https://github.com/bashonly) ### 2024.07.25 #### Extractor changes - **abematv**: [Adapt key retrieval to request handler framework](https://github.com/yt-dlp/yt-dlp/commit/a3bab4752a2b3d56e5a59b4e0411bb8f695c010b) ([#10491](https://github.com/yt-dlp/yt-dlp/issues/10491)) by [bashonly](https://github.com/bashonly) - **facebook**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1a34a802f44a1dab8f642c79c3cc810e21541d3b) ([#10531](https://github.com/yt-dlp/yt-dlp/issues/10531)) by [bashonly](https://github.com/bashonly) - **mlbtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f0993391e6052ec8f7aacc286609564f226943b9) ([#10515](https://github.com/yt-dlp/yt-dlp/issues/10515)) by [bashonly](https://github.com/bashonly) - **tiktok**: [Fix and deprioritize JSON subtitles](https://github.com/yt-dlp/yt-dlp/commit/2f97779f335ac069ecccd9c7bf81abf4a83cfe7a) ([#10516](https://github.com/yt-dlp/yt-dlp/issues/10516)) by [bashonly](https://github.com/bashonly) - **vimeo**: [Fix chapters extraction](https://github.com/yt-dlp/yt-dlp/commit/a0a1bc3d8d8e3bb9a48a06e835815a0460e90e77) ([#10544](https://github.com/yt-dlp/yt-dlp/issues/10544)) by [bashonly](https://github.com/bashonly) - **youtube**: [Fix `n` function name extraction for player `3400486c`](https://github.com/yt-dlp/yt-dlp/commit/713b4cd18f00556771af8cfdd9cea6cc1a09e948) ([#10542](https://github.com/yt-dlp/yt-dlp/issues/10542)) by [bashonly](https://github.com/bashonly) #### Misc. changes - **build**: [Pin `setuptools` version](https://github.com/yt-dlp/yt-dlp/commit/e046db8a116b1c320d4785daadd48ea0b22a3987) ([#10493](https://github.com/yt-dlp/yt-dlp/issues/10493)) by [bashonly](https://github.com/bashonly) ### 2024.07.16 #### Core changes - [Fix `noprogress` if `test=True` with `--quiet` and `--verbose`](https://github.com/yt-dlp/yt-dlp/commit/66ce3d76d87af3f81cc9dfec4be4704016cb1cdb) ([#10454](https://github.com/yt-dlp/yt-dlp/issues/10454)) by [Grub4K](https://github.com/Grub4K) - [Support `auto-tty` and `no_color-tty` for `--color`](https://github.com/yt-dlp/yt-dlp/commit/d9cbced493cae2008508d94a2db5dd98be7c01fc) ([#10453](https://github.com/yt-dlp/yt-dlp/issues/10453)) by [Grub4K](https://github.com/Grub4K) - **update**: [Fix network error handling](https://github.com/yt-dlp/yt-dlp/commit/ed1b9ed93dd90d2cc960c0d8eaa9d919db224203) ([#10486](https://github.com/yt-dlp/yt-dlp/issues/10486)) by [bashonly](https://github.com/bashonly) - **utils**: `parse_codecs`: [Fix parsing of mixed case codec strings](https://github.com/yt-dlp/yt-dlp/commit/cc0070f6496e501d77352bad475fb02d6a86846a) by [bashonly](https://github.com/bashonly) #### Extractor changes - **adn**: [Adjust for .com domain change](https://github.com/yt-dlp/yt-dlp/commit/959b7a379b8e5da059d110a63339c964b6265736) ([#10399](https://github.com/yt-dlp/yt-dlp/issues/10399)) by [infanf](https://github.com/infanf) - **afreecatv**: [Fix login and use `legacy_ssl`](https://github.com/yt-dlp/yt-dlp/commit/4cd41469243624d90b7a2009b95cbe0609343efe) ([#10440](https://github.com/yt-dlp/yt-dlp/issues/10440)) by [bashonly](https://github.com/bashonly) - **box**: [Support enterprise URLs](https://github.com/yt-dlp/yt-dlp/commit/705f5b84dec75cc7af97f42fd1530e8062735970) ([#10419](https://github.com/yt-dlp/yt-dlp/issues/10419)) by [seproDev](https://github.com/seproDev) - **digitalconcerthall**: [Extract HEVC and FLAC formats](https://github.com/yt-dlp/yt-dlp/commit/e62fa6b0e0186f8c5666c2c5ab64cf191abdafc1) ([#10470](https://github.com/yt-dlp/yt-dlp/issues/10470)) by [bashonly](https://github.com/bashonly) - **dplay**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/39e6c4cb44b9292e89ac0afec3cd0afc2ae8775f) ([#10471](https://github.com/yt-dlp/yt-dlp/issues/10471)) by [bashonly](https://github.com/bashonly) - **epidemicsound**: [Support sound effects URLs](https://github.com/yt-dlp/yt-dlp/commit/8531d2b03bac9cc746f2ee8098aaf8f115505f5b) ([#10436](https://github.com/yt-dlp/yt-dlp/issues/10436)) by [iancmy](https://github.com/iancmy) - **generic**: [Fix direct video link extensions](https://github.com/yt-dlp/yt-dlp/commit/b9afb99e7c34d0eb15ddc6689cd7d20eebfda68e) ([#10468](https://github.com/yt-dlp/yt-dlp/issues/10468)) by [bashonly](https://github.com/bashonly) - **picarto**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bacd18b7df08b4995644fd12cee1f8c8e8636bc7) ([#10414](https://github.com/yt-dlp/yt-dlp/issues/10414)) by [Frankgoji](https://github.com/Frankgoji) - **soundcloud**: permalink, user: [Extract tracks only](https://github.com/yt-dlp/yt-dlp/commit/22870b81bad97dfa6307a7add44753b2dffc76a9) ([#10463](https://github.com/yt-dlp/yt-dlp/issues/10463)) by [DunnesH](https://github.com/DunnesH) - **tiktok**: live: [Fix room ID extraction](https://github.com/yt-dlp/yt-dlp/commit/d2189d3d36987ebeac426fd70a60a5fe86325a2b) ([#10408](https://github.com/yt-dlp/yt-dlp/issues/10408)) by [mokrueger](https://github.com/mokrueger) - **tv5monde**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/9b95a6765a5f6325af99c4aca961587f0c426e8c) ([#10417](https://github.com/yt-dlp/yt-dlp/issues/10417)) by [bashonly](https://github.com/bashonly) (With fixes in [cc1a309](https://github.com/yt-dlp/yt-dlp/commit/cc1a3098c00995c6aebc2a16bd1050a66bad64db)) - **youtube** - [Avoid poToken experiment player responses](https://github.com/yt-dlp/yt-dlp/commit/8b8b442cb005a8d85315f301615f83fb736b967a) ([#10456](https://github.com/yt-dlp/yt-dlp/issues/10456)) by [seproDev](https://github.com/seproDev) (With fixes in [16da8ef](https://github.com/yt-dlp/yt-dlp/commit/16da8ef9937ff76632dfef02e5062c5ba99c8ea2)) - [Invalidate nsig cache from < 2024.07.09](https://github.com/yt-dlp/yt-dlp/commit/04e17ba20a139f1b3e30ec4bafa3fba26888f0b3) ([#10401](https://github.com/yt-dlp/yt-dlp/issues/10401)) by [bashonly](https://github.com/bashonly) - [Reduce android client priority](https://github.com/yt-dlp/yt-dlp/commit/b85eef0a615a01304f88a3847309c667e09a20df) ([#10467](https://github.com/yt-dlp/yt-dlp/issues/10467)) by [seproDev](https://github.com/seproDev) #### Networking changes - [Add `legacy_ssl` request extension](https://github.com/yt-dlp/yt-dlp/commit/150ecc45d9cacc919550c13b04fd998ac5103a6b) ([#10448](https://github.com/yt-dlp/yt-dlp/issues/10448)) by [coletdjnz](https://github.com/coletdjnz) - **Request Handler**: curl_cffi: [Support `curl_cffi` 0.7.X](https://github.com/yt-dlp/yt-dlp/commit/42bfca00a6b460fc053514cdd7ac6f5b5daddf0c) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - **build** - [Include `curl_cffi` in `yt-dlp_linux`](https://github.com/yt-dlp/yt-dlp/commit/4521f30d1479315cd5c3bf4abdad19391952df98) by [bashonly](https://github.com/bashonly) - [Pin `curl-cffi` to 0.5.10 for Windows](https://github.com/yt-dlp/yt-dlp/commit/ac30941ae682f71eab010877c9a977736a61d3cf) by [bashonly](https://github.com/bashonly) - **cleanup**: Miscellaneous: [89a161e](https://github.com/yt-dlp/yt-dlp/commit/89a161e8c62569a662deda1c948664152efcb6b4) by [bashonly](https://github.com/bashonly) ### 2024.07.09 #### Core changes - [Do not alter default format selection when simulated](https://github.com/yt-dlp/yt-dlp/commit/0b570f2a90ce2363ba06089217514d644e7be2e0) ([#9862](https://github.com/yt-dlp/yt-dlp/issues/9862)) by [seproDev](https://github.com/seproDev) #### Extractor changes - **youtube**: [Remove broken `n` function extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/7ead7332af69422cee931aec3faa277288e9e212) ([#10396](https://github.com/yt-dlp/yt-dlp/issues/10396)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) ### 2024.07.08 #### Core changes - **jsinterp**: [Implement `Function.prototype` resolving for `call` and `apply`](https://github.com/yt-dlp/yt-dlp/commit/6c056ea7aeb03660281653a9668547f2548f194f) ([#10392](https://github.com/yt-dlp/yt-dlp/issues/10392)) by [Grub4K](https://github.com/Grub4K) #### Extractor changes - **soundcloud**: [Fix rate-limit handling](https://github.com/yt-dlp/yt-dlp/commit/4b50b292cc98534fb8c7cdf0ae5cb85862f7ebfc) ([#10389](https://github.com/yt-dlp/yt-dlp/issues/10389)) by [bashonly](https://github.com/bashonly) - **youtube**: [Fix JS `n` function name extraction](https://github.com/yt-dlp/yt-dlp/commit/297b0a379282a15c80d82d51f3757c961db2dae1) ([#10390](https://github.com/yt-dlp/yt-dlp/issues/10390)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) ### 2024.07.07 #### Important changes - Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785) - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors #### Core changes - [Address gaps in allowed extensions](https://github.com/yt-dlp/yt-dlp/commit/2469119490d7e0397ebbf5c5ae327316f955eef2) ([#10362](https://github.com/yt-dlp/yt-dlp/issues/10362)) by [bashonly](https://github.com/bashonly) - [Fix `--ignore-no-formats-error`](https://github.com/yt-dlp/yt-dlp/commit/cc767e9490056efaaa11c186b0d032e4b4969180) ([#10345](https://github.com/yt-dlp/yt-dlp/issues/10345)) by [Grub4K](https://github.com/Grub4K) #### Extractor changes - **abematv**: [Extract availability](https://github.com/yt-dlp/yt-dlp/commit/2a1a1b8e67e864289ac7ba5d05ec63dbb19a639f) ([#10348](https://github.com/yt-dlp/yt-dlp/issues/10348)) by [middlingphys](https://github.com/middlingphys) - **chzzk**: [Extract with API v3](https://github.com/yt-dlp/yt-dlp/commit/4862a29854d4044120e3f97b52199711ad04bee1) ([#10363](https://github.com/yt-dlp/yt-dlp/issues/10363)) by [hui1601](https://github.com/hui1601) - **douyutv**: [Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/commit/6075a029dba70a89675ae1250e7cdfd91f0eba41) ([#10347](https://github.com/yt-dlp/yt-dlp/issues/10347)) by [LeSuisse](https://github.com/LeSuisse) - **jiosaavn**: playlist: [Support featured playlists](https://github.com/yt-dlp/yt-dlp/commit/f0f867f008a1728f5f6ac1224b9e014b5d27f817) ([#10382](https://github.com/yt-dlp/yt-dlp/issues/10382)) by [harbhim](https://github.com/harbhim) - **vidyard**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00766ece0c5c7a80781a4ff677198c5fb69d9dc0) ([#10155](https://github.com/yt-dlp/yt-dlp/issues/10155)) by [exterrestris](https://github.com/exterrestris) - **vimeo**: [Fix password-protected video extraction](https://github.com/yt-dlp/yt-dlp/commit/c1c9bb4adb42d0d93a2fb5d93a7de0a87b6ba884) ([#10341](https://github.com/yt-dlp/yt-dlp/issues/10341)) by [bashonly](https://github.com/bashonly) - **vtv**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/987a1f94c24275f2b0cd82e719956687415dd732) ([#10173](https://github.com/yt-dlp/yt-dlp/issues/10173)) by [DinhHuy2010](https://github.com/DinhHuy2010) - **yle_areena** - [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/4cdc976bd861b5835601ae402bef543eacd88f3d) ([#10380](https://github.com/yt-dlp/yt-dlp/issues/10380)) by [seproDev](https://github.com/seproDev) - [Fix subtitle extraction](https://github.com/yt-dlp/yt-dlp/commit/0d174e8bed32081eb38ef7f5d1a1282ae154f517) ([#10379](https://github.com/yt-dlp/yt-dlp/issues/10379)) by [Grub4K](https://github.com/Grub4K) #### Misc. changes - **cleanup**: Miscellaneous: [b337d29](https://github.com/yt-dlp/yt-dlp/commit/b337d2989ce0614651d363383f6f743d977248ef) by [bashonly](https://github.com/bashonly) ### 2024.07.02 #### Core changes - [Fix `--compat-opt allow-unsafe-ext`](https://github.com/yt-dlp/yt-dlp/commit/773bbb181506856ffda95496ab60c1c9603f1f71) ([#10336](https://github.com/yt-dlp/yt-dlp/issues/10336)) by [bashonly](https://github.com/bashonly), [rdamas](https://github.com/rdamas) #### Extractor changes - **banbye**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7509791385ba88cb7ec0ab17e826681f4af4b66e) ([#10332](https://github.com/yt-dlp/yt-dlp/issues/10332)) by [PatrykMis](https://github.com/PatrykMis), [seproDev](https://github.com/seproDev) - **murrtube**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6403530e2dfe259a87afe444708c4f3024cc45b8) ([#9249](https://github.com/yt-dlp/yt-dlp/issues/9249)) by [DrakoCpp](https://github.com/DrakoCpp) - **zaiko**: [Support JWT video URLs](https://github.com/yt-dlp/yt-dlp/commit/7799e518956387bb3c1064c9beae26eab8d5044a) ([#10130](https://github.com/yt-dlp/yt-dlp/issues/10130)) by [pzhlkj6612](https://github.com/pzhlkj6612) #### Postprocessor changes - **embedthumbnail**: [Fix embedding with mutagen](https://github.com/yt-dlp/yt-dlp/commit/d502f4c6d95b74896f40070d07229997f0850f31) ([#10337](https://github.com/yt-dlp/yt-dlp/issues/10337)) by [bashonly](https://github.com/bashonly) #### Misc. changes - **cleanup**: Miscellaneous: [93d33cb](https://github.com/yt-dlp/yt-dlp/commit/93d33cb29af9e2e84369ac43589d50ce8e0160ef) by [bashonly](https://github.com/bashonly) ### 2024.07.01 #### Important changes - Security: [[CVE-2024-38519](https://nvd.nist.gov/vuln/detail/CVE-2024-38519)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j) - Unsafe extensions are now blocked from being downloaded #### Core changes - [Add `playlist_channel` and `playlist_channel_id` fields](https://github.com/yt-dlp/yt-dlp/commit/55e3e6fd21e741ec5ae3d8624de5e5ea345810eb) ([#10266](https://github.com/yt-dlp/yt-dlp/issues/10266)) by [bashonly](https://github.com/bashonly) - [Disallow unsafe extensions (CVE-2024-38519)](https://github.com/yt-dlp/yt-dlp/commit/5ce582448ececb8d9c30c8c31f58330090ced03a) by [Grub4K](https://github.com/Grub4K) - **cookies**: [Fix `--cookies-from-browser` DE detection on Linux](https://github.com/yt-dlp/yt-dlp/commit/a8520244b8642880e4d35925e9e49eff94d548de) ([#10237](https://github.com/yt-dlp/yt-dlp/issues/10237)) by [peisenwang](https://github.com/peisenwang) #### Extractor changes - **afreecatv** - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/e8352ad6599de7b5371dc39a1a1edc7890aaedb4) ([#10174](https://github.com/yt-dlp/yt-dlp/issues/10174)) by [hui1601](https://github.com/hui1601) - catchstory: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/054a3ba7d1293f9fbe21800d62d1e5ddcbded238) ([#10235](https://github.com/yt-dlp/yt-dlp/issues/10235)) by [hui1601](https://github.com/hui1601) - **bilibili**: [Support legacy formats](https://github.com/yt-dlp/yt-dlp/commit/1d6ab17d0752ee9cf19e3e63c7dec7b600d3f228) ([#9117](https://github.com/yt-dlp/yt-dlp/issues/9117)) by [c-basalt](https://github.com/c-basalt), [GD-Slime](https://github.com/GD-Slime) - **bitchute**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/5b1a2aa978d0074cee278e7659f32f52ecc4ab53) ([#10301](https://github.com/yt-dlp/yt-dlp/issues/10301)) by [seproDev](https://github.com/seproDev) - **brightcove**: [Upgrade requests to HTTPS](https://github.com/yt-dlp/yt-dlp/commit/90c3721a322756bb7f4ca10ceb73744500bee37e) ([#10202](https://github.com/yt-dlp/yt-dlp/issues/10202)) by [bashonly](https://github.com/bashonly) - **cloudflarestream**: [Fix `_VALID_URL` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/7aa322c02cec54eb77154a89da7e400194f0bd03) ([#10215](https://github.com/yt-dlp/yt-dlp/issues/10215)) by [bashonly](https://github.com/bashonly) - **cloudycdn**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/b758877afa225747fba81c8a580e27583a231734) ([#10271](https://github.com/yt-dlp/yt-dlp/issues/10271)) by [Caesim404](https://github.com/Caesim404) - **digitalconcerthall**: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/2a4f2e82dbeeb0c9130883c83dac689d5260c871) ([#10152](https://github.com/yt-dlp/yt-dlp/issues/10152)) by [seproDev](https://github.com/seproDev), [tippfehlr](https://github.com/tippfehlr) - **facebook**: reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8ca1d57ed08d00efa117820a5a82f763b20e2d1d) ([#10232](https://github.com/yt-dlp/yt-dlp/issues/10232)) by [bashonly](https://github.com/bashonly) - **francetv** - [Detect and raise errors for DRM](https://github.com/yt-dlp/yt-dlp/commit/3690c2f59827c79a1bbe388a7c1ae75db7477db2) ([#10165](https://github.com/yt-dlp/yt-dlp/issues/10165)) by [bashonly](https://github.com/bashonly) - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/081708d6074dfbb907e25af61ba530bba0d4b31d) ([#10177](https://github.com/yt-dlp/yt-dlp/issues/10177)) by [bashonly](https://github.com/bashonly) - **generic**: [Add `key_query` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/5dbac313ae4e3e8521dfe2e1a6a048a98ff4b4fe) by [bashonly](https://github.com/bashonly) - **graspop**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1d369b4096d79233e0ac2c93762746a64d7a69c8) ([#10268](https://github.com/yt-dlp/yt-dlp/issues/10268)) by [Niluge-KiWi](https://github.com/Niluge-KiWi) - **jiocinema**: series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61714f46956f61612032bba857aed7ad1387eccd) ([#10139](https://github.com/yt-dlp/yt-dlp/issues/10139)) by [varunchopra](https://github.com/varunchopra) - **khanacademy**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4093eb1fcc29a0e2aea9adfcba479787d9ae0c0c) ([#9136](https://github.com/yt-dlp/yt-dlp/issues/9136)) by [c-basalt](https://github.com/c-basalt) - **laracasts**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b8da8a98f897599095d4ef1644b8c5fd39921118) ([#10055](https://github.com/yt-dlp/yt-dlp/issues/10055)) by [ASertacAkkaya](https://github.com/ASertacAkkaya), [seproDev](https://github.com/seproDev) - **matchtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f3411af12e209bc5624e1ac31271b8aabe2d3c90) ([#10190](https://github.com/yt-dlp/yt-dlp/issues/10190)) by [megumintyan](https://github.com/megumintyan) - **mediasite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0953209a857c51648aee89d205c086b0e1dd3864) ([#10273](https://github.com/yt-dlp/yt-dlp/issues/10273)) by [bashonly](https://github.com/bashonly) - **microsoftembed**: [Add extractors for dev materials](https://github.com/yt-dlp/yt-dlp/commit/9200bc70c94546b2191bb6fbfc9cea98a919cc56) ([#9177](https://github.com/yt-dlp/yt-dlp/issues/9177)) by [c-basalt](https://github.com/c-basalt) - **mlbtv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/61edf57f8f13f6dfd81154174e647eb5fdd26089) ([#10296](https://github.com/yt-dlp/yt-dlp/issues/10296)) by [bashonly](https://github.com/bashonly) - **neteasemusic**: [Extract more formats from new API](https://github.com/yt-dlp/yt-dlp/commit/7a03f88c40b80d3cf54f68edd9d4bdd6aa527570) ([#10258](https://github.com/yt-dlp/yt-dlp/issues/10258)) by [hafeoz](https://github.com/hafeoz) - **nhkradiru**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b8e2a5e0e1030076f833917906e19bb6c7b318f6) ([#10106](https://github.com/yt-dlp/yt-dlp/issues/10106)) by [garret1317](https://github.com/garret1317) - **nuum**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/aefede25561a06cba398d4f593eee2fbe942693b) ([#10316](https://github.com/yt-dlp/yt-dlp/issues/10316)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **orf** - on - [Add `prefer_segments_playlist` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/e6a22834df1776ec4e486526f6df2bf53cb7e06f) ([#10314](https://github.com/yt-dlp/yt-dlp/issues/10314)) by [seproDev](https://github.com/seproDev) - [Support segmented episodes](https://github.com/yt-dlp/yt-dlp/commit/8b46ad4d8b8ee8c5472af0cde863baa89ca3f425) ([#10053](https://github.com/yt-dlp/yt-dlp/issues/10053)) by [seproDev](https://github.com/seproDev) - **patreoncampaign**: [Fix `campaign_id` extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5a47da400b645aadbda6afd1156bd89c744f48) ([#10070](https://github.com/yt-dlp/yt-dlp/issues/10070)) by [bashonly](https://github.com/bashonly) - **podbayfm**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/d4b52ce3fcb8d9578ed12365648eaba8718c603e) ([#10195](https://github.com/yt-dlp/yt-dlp/issues/10195)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) - **pokergo**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/36e8dd832579b5375a0f6626af4268b86b4eb21a) ([#10319](https://github.com/yt-dlp/yt-dlp/issues/10319)) by [axpauls](https://github.com/axpauls) - **qqmusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4f5d7be3c5590bb257d8ff521572aee9839ab754) ([#9768](https://github.com/yt-dlp/yt-dlp/issues/9768)) by [c-basalt](https://github.com/c-basalt) - **rtvslo.si**: show: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/92a1c4abaeeba9a69d611c57b73555cb1a1f00ad) ([#8418](https://github.com/yt-dlp/yt-dlp/issues/8418)) by [JSubelj](https://github.com/JSubelj), [seproDev](https://github.com/seproDev) - **soundcloud**: [Fix `download` format extraction](https://github.com/yt-dlp/yt-dlp/commit/e53e56b73543799638fa6abb0c78f8b091aa84e1) ([#10125](https://github.com/yt-dlp/yt-dlp/issues/10125)) by [bashonly](https://github.com/bashonly) - **sproutvideo**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/d6c2c2bc84f1434255be5c73baeb17d893d2c0d4) ([#10098](https://github.com/yt-dlp/yt-dlp/issues/10098)) by [bashonly](https://github.com/bashonly), [TheZ3ro](https://github.com/TheZ3ro) - **tiktok** - [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/ea88129784fcbb6987161df9ba05909325d8e2e9) ([#10124](https://github.com/yt-dlp/yt-dlp/issues/10124)) by [bashonly](https://github.com/bashonly) - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/96472d72f29550c25c5dcedcde02c38c192b0011) ([#10216](https://github.com/yt-dlp/yt-dlp/issues/10216)) by [bashonly](https://github.com/bashonly) - **tubitv** - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/bef9a9e5361fd7a72e21d0f1a8c8afb70d89e8c5) ([#9975](https://github.com/yt-dlp/yt-dlp/issues/9975)) by [chilinux](https://github.com/chilinux) - series: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/d7d861811c15585a4f7ec9d5ae68d2ac28de28a0) ([#10116](https://github.com/yt-dlp/yt-dlp/issues/10116)) by [bashonly](https://github.com/bashonly) - **vimeo**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/d4b99a233314bf31f9c842035ea9884673d5313a) ([#10327](https://github.com/yt-dlp/yt-dlp/issues/10327)) by [bashonly](https://github.com/bashonly) - **youtube** - [Extract all formats from multi-language m3u8s](https://github.com/yt-dlp/yt-dlp/commit/9bd85019931927a99b0fe0dc58ac51acca9fbe72) ([#9875](https://github.com/yt-dlp/yt-dlp/issues/9875)) by [bashonly](https://github.com/bashonly), [clienthax](https://github.com/clienthax) - [Skip formats if nsig decoding fails](https://github.com/yt-dlp/yt-dlp/commit/800ec085ccf98420584d8bb38c20a2c079669b09) ([#10223](https://github.com/yt-dlp/yt-dlp/issues/10223)) by [bashonly](https://github.com/bashonly) - [Suppress "Unavailable videos are hidden" warning](https://github.com/yt-dlp/yt-dlp/commit/24f3097ea9a470a984d0454dc013cafa2325f5f8) ([#10159](https://github.com/yt-dlp/yt-dlp/issues/10159)) by [mgedmin](https://github.com/mgedmin) - tab: [Fix channel metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/a0d9967f6822fc279e86bce33464194985148727) ([#10071](https://github.com/yt-dlp/yt-dlp/issues/10071)) by [bashonly](https://github.com/bashonly), [shoxie007](https://github.com/shoxie007) #### Downloader changes - **hls**: [Apply `extra_param_to_key_url` from info dict](https://github.com/yt-dlp/yt-dlp/commit/ca8885edd93bdf8912af6c22ee335b6222cb9ba9) by [bashonly](https://github.com/bashonly) #### Postprocessor changes - **embedthumbnail**: [Fix postprocessor](https://github.com/yt-dlp/yt-dlp/commit/f2a4ea1794718e4dc0148bc172cb877f1080903b) ([#10248](https://github.com/yt-dlp/yt-dlp/issues/10248)) by [Grub4K](https://github.com/Grub4K) #### Networking changes - **Request Handler**: requests: [Bump minimum `requests` version to 2.32.2](https://github.com/yt-dlp/yt-dlp/commit/db50f19d76c6870a5a13d0cab9287d684fd7449a) ([#10079](https://github.com/yt-dlp/yt-dlp/issues/10079)) by [bashonly](https://github.com/bashonly) #### Misc. changes - **build** - [Bump Pyinstaller to `>=6.7.0` for all builds](https://github.com/yt-dlp/yt-dlp/commit/5fdd13006a1c5d78642c8d3c4c7df0448273c2ae) ([#10069](https://github.com/yt-dlp/yt-dlp/issues/10069)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) - [Cache dependencies for `macos` job](https://github.com/yt-dlp/yt-dlp/commit/46c1b7cfec1d0e6155083ca7e6948674c64ecb97) ([#10088](https://github.com/yt-dlp/yt-dlp/issues/10088)) by [bashonly](https://github.com/bashonly) - [Use `macos-12` image for `yt-dlp_macos`](https://github.com/yt-dlp/yt-dlp/commit/03334d639d5282cd4107edb32c623ba400262fc4) ([#10063](https://github.com/yt-dlp/yt-dlp/issues/10063)) by [bashonly](https://github.com/bashonly) - **cleanup** - [Add more ruff rules](https://github.com/yt-dlp/yt-dlp/commit/add96eb9f84cfffe85682bf2fb85135746994ee8) ([#10149](https://github.com/yt-dlp/yt-dlp/issues/10149)) by [seproDev](https://github.com/seproDev) - [Bump ruff to 0.5.x](https://github.com/yt-dlp/yt-dlp/commit/7814c50948a2b9a4c746441ecbc509ae563d5d1f) ([#10282](https://github.com/yt-dlp/yt-dlp/issues/10282)) by [seproDev](https://github.com/seproDev) - Miscellaneous: [6aaf96a](https://github.com/yt-dlp/yt-dlp/commit/6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt), [jucor](https://github.com/jucor), [seproDev](https://github.com/seproDev) - **test**: download: [Raise on network errors](https://github.com/yt-dlp/yt-dlp/commit/54a63e80af82791d2f0985bd0176bb182963fd5f) ([#10283](https://github.com/yt-dlp/yt-dlp/issues/10283)) by [bashonly](https://github.com/bashonly), [seproDev](https://github.com/seproDev) ### 2024.05.27 #### Extractor changes - [Fix parsing of base URL in SMIL manifest](https://github.com/yt-dlp/yt-dlp/commit/26603d0b34898818992bee4598e0607c07059511) ([#9225](https://github.com/yt-dlp/yt-dlp/issues/9225)) by [seproDev](https://github.com/seproDev) - **peertube**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/12b248ce60be1aa1362edd839d915bba70dbee4b) ([#10044](https://github.com/yt-dlp/yt-dlp/issues/10044)) by [bashonly](https://github.com/bashonly), [trueauracoral](https://github.com/trueauracoral) - **piksel**: [Update domain](https://github.com/yt-dlp/yt-dlp/commit/ae2194e1dd4a99d32eb3cab7c48a0ff03101ef3b) ([#9223](https://github.com/yt-dlp/yt-dlp/issues/9223)) by [seproDev](https://github.com/seproDev) - **tiktok**: user: [Fix extraction loop](https://github.com/yt-dlp/yt-dlp/commit/c53c2e40fde8f2e15c7c62f8ca1a5d9e90ddc079) ([#10035](https://github.com/yt-dlp/yt-dlp/issues/10035)) by [bashonly](https://github.com/bashonly) #### Misc. changes - **cleanup**: Miscellaneous: [5e3e19c](https://github.com/yt-dlp/yt-dlp/commit/5e3e19c93c52830da98d9d1ed84ea7a559efefbd) by [bashonly](https://github.com/bashonly) ### 2024.05.26 #### Core changes - [Better warning when requested subs format not found](https://github.com/yt-dlp/yt-dlp/commit/7e4259dff0b681a3f0e8a930799ce0394328c86e) ([#9873](https://github.com/yt-dlp/yt-dlp/issues/9873)) by [DaPotato69](https://github.com/DaPotato69) - [Merged with youtube-dl a08f2b7](https://github.com/yt-dlp/yt-dlp/commit/a4da9db87b6486b270c15dfa07ab5bfedc83f6bd) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Warn if lack of ffmpeg alters format selection](https://github.com/yt-dlp/yt-dlp/commit/96da9525043f78aca4544d01761b13b2140e9ae6) ([#9805](https://github.com/yt-dlp/yt-dlp/issues/9805)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - **cookies** - [Add `--cookies-from-browser` support for Whale](https://github.com/yt-dlp/yt-dlp/commit/dd9ad97b1fbdd36c086b8ba82328a4d954f78f8e) ([#9649](https://github.com/yt-dlp/yt-dlp/issues/9649)) by [roeniss](https://github.com/roeniss) - [Get chrome session cookies with `--cookies-from-browser`](https://github.com/yt-dlp/yt-dlp/commit/f1f158976e38d38a260762accafe7bbe6d451151) ([#9747](https://github.com/yt-dlp/yt-dlp/issues/9747)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) - **windows**: [Improve shell quoting and tests](https://github.com/yt-dlp/yt-dlp/commit/64766459e37451b665c1464073c28361fbcf1c25) ([#9802](https://github.com/yt-dlp/yt-dlp/issues/9802)) by [Grub4K](https://github.com/Grub4K) (With fixes in [7e26bd5](https://github.com/yt-dlp/yt-dlp/commit/7e26bd53f9c5893518fde81dfd0079ec08dd841e)) #### Extractor changes - [Add POST data hash to `--write-pages` filenames](https://github.com/yt-dlp/yt-dlp/commit/61b17437dc14a1c7e90ff48a6198df77828c6df4) ([#9879](https://github.com/yt-dlp/yt-dlp/issues/9879)) by [minamotorin](https://github.com/minamotorin) (With fixes in [c999bac](https://github.com/yt-dlp/yt-dlp/commit/c999bac02c5a4f755b2a82488a975e91c988ffd8) by [bashonly](https://github.com/bashonly)) - [Make `_search_nextjs_data` non fatal](https://github.com/yt-dlp/yt-dlp/commit/3ee1194288981c4f2c4abd8315326de0c424d2ce) ([#8937](https://github.com/yt-dlp/yt-dlp/issues/8937)) by [Grub4K](https://github.com/Grub4K) - **afreecatv**: live: [Add `cdn` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/315b3544296bb83012e20ee3af9d3cbf5600dd1c) ([#9666](https://github.com/yt-dlp/yt-dlp/issues/9666)) by [bashonly](https://github.com/bashonly) - **alura**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fc2879ecb05aaad36869609d154e4321362c1f63) ([#9658](https://github.com/yt-dlp/yt-dlp/issues/9658)) by [hugohaa](https://github.com/hugohaa) - **artetv**: [Label forced subtitles](https://github.com/yt-dlp/yt-dlp/commit/7b5674949fd03a33b47b67b31d56a5adf1c48c91) ([#9945](https://github.com/yt-dlp/yt-dlp/issues/9945)) by [vtexier](https://github.com/vtexier) - **bbc**: [Fix and extend extraction](https://github.com/yt-dlp/yt-dlp/commit/7975ddf245d22af034d5b983eeb1c5ec6c2ce053) ([#9705](https://github.com/yt-dlp/yt-dlp/issues/9705)) by [dirkf](https://github.com/dirkf), [kylegustavo](https://github.com/kylegustavo), [pukkandan](https://github.com/pukkandan) - **bilibili**: [Fix `--geo-verification-proxy` support](https://github.com/yt-dlp/yt-dlp/commit/2338827072dacab0f15348b70aec8685feefc8d1) ([#9817](https://github.com/yt-dlp/yt-dlp/issues/9817)) by [fireattack](https://github.com/fireattack) - **bilibilispacevideo** - [Better error message](https://github.com/yt-dlp/yt-dlp/commit/06d52c87314e0bbc16c43c405090843885577b88) ([#9839](https://github.com/yt-dlp/yt-dlp/issues/9839)) by [fireattack](https://github.com/fireattack) - [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4cc99d7b6cce8b39506ead01407445d576b63ee4) ([#9905](https://github.com/yt-dlp/yt-dlp/issues/9905)) by [c-basalt](https://github.com/c-basalt) - **boosty**: [Add cookies support](https://github.com/yt-dlp/yt-dlp/commit/145dc6f6563e80d2da1b3e9aea2ffa795b71622c) ([#9522](https://github.com/yt-dlp/yt-dlp/issues/9522)) by [RasmusAntons](https://github.com/RasmusAntons) - **brilliantpala**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/eead3bbc01f6529862bdad1f0b2adeabda4f006e) ([#9788](https://github.com/yt-dlp/yt-dlp/issues/9788)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **canalalpha**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/00a9f2e1f7fa69499221f2e8dd73a08efeef79bc) ([#9675](https://github.com/yt-dlp/yt-dlp/issues/9675)) by [kclauhk](https://github.com/kclauhk) - **cbc.ca**: player: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c8bf48f3a8fa29587e7c73ef5a7710385a5ea725) ([#9866](https://github.com/yt-dlp/yt-dlp/issues/9866)) by [carusocr](https://github.com/carusocr) - **cda**: [Fix age-gated web extraction](https://github.com/yt-dlp/yt-dlp/commit/6d8a53d870ff6795f509085bfbf3981417999038) ([#9939](https://github.com/yt-dlp/yt-dlp/issues/9939)) by [dirkf](https://github.com/dirkf), [emqi](https://github.com/emqi), [Podiumnoche](https://github.com/Podiumnoche), [Szpachlarz](https://github.com/Szpachlarz) - **commonmistakes**: [Raise error on blob URLs](https://github.com/yt-dlp/yt-dlp/commit/98d71d8c5e5dab08b561ee6f137e968d2a004262) ([#9897](https://github.com/yt-dlp/yt-dlp/issues/9897)) by [seproDev](https://github.com/seproDev) - **crunchyroll** - [Always make metadata available](https://github.com/yt-dlp/yt-dlp/commit/cb2fb4a643949322adba561ca73bcba3221ec0c5) ([#9772](https://github.com/yt-dlp/yt-dlp/issues/9772)) by [bashonly](https://github.com/bashonly) - [Fix auth and remove cookies support](https://github.com/yt-dlp/yt-dlp/commit/ff38a011d57b763f3a69bebd25a5dc9044a717ce) ([#9749](https://github.com/yt-dlp/yt-dlp/issues/9749)) by [bashonly](https://github.com/bashonly) - [Fix stream extraction](https://github.com/yt-dlp/yt-dlp/commit/f2816634e3be88fe158b342ee33918de3c272a54) ([#10005](https://github.com/yt-dlp/yt-dlp/issues/10005)) by [bashonly](https://github.com/bashonly) - [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/5904853ae5788509fdc4892cb7ecdfa9ae7f78e6) ([#9857](https://github.com/yt-dlp/yt-dlp/issues/9857)) by [bashonly](https://github.com/bashonly) - **dangalplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/0d067e77c3f5527946fb0c22ee1c7011994cba40) ([#10021](https://github.com/yt-dlp/yt-dlp/issues/10021)) by [bashonly](https://github.com/bashonly) - **discoveryplus**: [Fix dmax.de and related extractors](https://github.com/yt-dlp/yt-dlp/commit/90d2da311bbb5dc06f385ee428c7e4590936e995) ([#10020](https://github.com/yt-dlp/yt-dlp/issues/10020)) by [bashonly](https://github.com/bashonly) - **eplus**: [Handle URLs without videos](https://github.com/yt-dlp/yt-dlp/commit/351dc0bc334c4e1b5f00c152818c3ec0ed71f788) ([#9855](https://github.com/yt-dlp/yt-dlp/issues/9855)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **europarlwebstream**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/800a43983e5fb719526ce4cb3956216085c63268) ([#9647](https://github.com/yt-dlp/yt-dlp/issues/9647)) by [seproDev](https://github.com/seproDev), [voidful](https://github.com/voidful) - **facebook**: [Fix DASH formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e3b42d8b1b8bcfff7ba146c19fc3f6f6ba843cea) ([#9734](https://github.com/yt-dlp/yt-dlp/issues/9734)) by [bashonly](https://github.com/bashonly) - **godresource**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65e709d23530959075816e966c42179ad46e8e3b) ([#9629](https://github.com/yt-dlp/yt-dlp/issues/9629)) by [HobbyistDev](https://github.com/HobbyistDev) - **googledrive**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/85ec2a337ac325cf6427cbafd56f0a034c1a5218) ([#9908](https://github.com/yt-dlp/yt-dlp/issues/9908)) by [WyohKnott](https://github.com/WyohKnott) - **hearthisat**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5bbfdb7c999b22f1aeca0c3489c167d6eb73013b) ([#9949](https://github.com/yt-dlp/yt-dlp/issues/9949)) by [bohwaz](https://github.com/bohwaz), [seproDev](https://github.com/seproDev) - **hytale**: [Use `CloudflareStreamIE` explicitly](https://github.com/yt-dlp/yt-dlp/commit/31b417e1d1ccc67d5c027bf8878f483dc34cb118) ([#9672](https://github.com/yt-dlp/yt-dlp/issues/9672)) by [llamasblade](https://github.com/llamasblade) - **instagram**: [Support `/reels/` URLs](https://github.com/yt-dlp/yt-dlp/commit/06cb0638392b607b47d3c2ac48eb2ebecb0f060d) ([#9539](https://github.com/yt-dlp/yt-dlp/issues/9539)) by [amir16yp](https://github.com/amir16yp) - **jiocinema**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1463945ae5fb05986a0bd1aa02e41d1a08d93a02) ([#10026](https://github.com/yt-dlp/yt-dlp/issues/10026)) by [bashonly](https://github.com/bashonly) - **jiosaavn**: [Extract via API and fix playlists](https://github.com/yt-dlp/yt-dlp/commit/0c21c53885cf03f4040467ae8c44d7ff51016116) ([#9656](https://github.com/yt-dlp/yt-dlp/issues/9656)) by [bashonly](https://github.com/bashonly) - **lci**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5a2eebc76770fca91ffabeff658d560f716fec80) ([#10025](https://github.com/yt-dlp/yt-dlp/issues/10025)) by [ocococococ](https://github.com/ocococococ) - **mixch**: [Extract comments](https://github.com/yt-dlp/yt-dlp/commit/b38018b781b062d5169d104ab430489aef8e7f1e) ([#9860](https://github.com/yt-dlp/yt-dlp/issues/9860)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **moviepilot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/296df0da1d38a44d34c99b60a18066c301774537) ([#9366](https://github.com/yt-dlp/yt-dlp/issues/9366)) by [panatexxa](https://github.com/panatexxa) - **netease**: program: [Improve `--no-playlist` message](https://github.com/yt-dlp/yt-dlp/commit/73f12119b52d98281804b0c072b2ed6aa841ec88) ([#9488](https://github.com/yt-dlp/yt-dlp/issues/9488)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **nfb**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/0a1a8e3005f66c44bf67633dccd4df19c3fccd1a) ([#9650](https://github.com/yt-dlp/yt-dlp/issues/9650)) by [rrgomes](https://github.com/rrgomes) - **ntslive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/be7db1a5a8c483726c511c30ea4689cbb8b27962) ([#9641](https://github.com/yt-dlp/yt-dlp/issues/9641)) by [lostfictions](https://github.com/lostfictions) - **orf**: on: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/0dd53faeca2ba0ce138e4092d07b5f2dbf2422f9) ([#9677](https://github.com/yt-dlp/yt-dlp/issues/9677)) by [TuxCoder](https://github.com/TuxCoder) - **orftvthek**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/3779f2a307ba3ef1d28e107cdd71b221dfb4eb36) ([#10011](https://github.com/yt-dlp/yt-dlp/issues/10011)) by [seproDev](https://github.com/seproDev) - **patreon** - [Extract multiple embeds](https://github.com/yt-dlp/yt-dlp/commit/036e0d92c6052465673d459678322ea03e61483d) ([#9850](https://github.com/yt-dlp/yt-dlp/issues/9850)) by [bashonly](https://github.com/bashonly) - [Fix Vimeo embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c9ce57d9bf51541da2381d99bc096a9d0ddf1f27) ([#9712](https://github.com/yt-dlp/yt-dlp/issues/9712)) by [bashonly](https://github.com/bashonly) - **piapro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3ba8de62d61d782256f5c1e9939a0762039657de) ([#9311](https://github.com/yt-dlp/yt-dlp/issues/9311)) by [FinnRG](https://github.com/FinnRG), [seproDev](https://github.com/seproDev) - **pornhub**: [Fix login by email address](https://github.com/yt-dlp/yt-dlp/commit/518c1afc1592cae3e4eb39dc646b5bc059333112) ([#9914](https://github.com/yt-dlp/yt-dlp/issues/9914)) by [feederbox826](https://github.com/feederbox826) - **qub**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6b54cccdcb892bca3e55993480d8b86f1c7e6da6) ([#7019](https://github.com/yt-dlp/yt-dlp/issues/7019)) by [alexhuot1](https://github.com/alexhuot1), [dirkf](https://github.com/dirkf) - **reddit**: [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/82f4f4444e26daf35b7302c406fe2312f78f619e) ([#10006](https://github.com/yt-dlp/yt-dlp/issues/10006)) by [kclauhk](https://github.com/kclauhk) - **soundcloud** - [Add `formats` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/beaf832c7a9d57833f365ce18f6115b88071b296) ([#10004](https://github.com/yt-dlp/yt-dlp/issues/10004)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Extract `genres`](https://github.com/yt-dlp/yt-dlp/commit/231c2eacc41b06b65c63edf94c0d04768a5da607) ([#9821](https://github.com/yt-dlp/yt-dlp/issues/9821)) by [bashonly](https://github.com/bashonly) - **taptap**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/63b569bc5e7d461753637a20ad84a575adee4c0a) ([#9776](https://github.com/yt-dlp/yt-dlp/issues/9776)) by [c-basalt](https://github.com/c-basalt) - **tele5**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/c92e4e625e9e6bbbbf8e3b20c3e7ebe57c16072d) ([#10024](https://github.com/yt-dlp/yt-dlp/issues/10024)) by [bashonly](https://github.com/bashonly) - **theatercomplextown**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/8056a3026ed6ec6a6d0ed56fdd7ebcd16e928341) ([#9754](https://github.com/yt-dlp/yt-dlp/issues/9754)) by [bashonly](https://github.com/bashonly) - **tiktok** - [Add `device_id` extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/3584b8390bd21c0393a3079eeee71aed56a1c1d8) ([#9951](https://github.com/yt-dlp/yt-dlp/issues/9951)) by [bashonly](https://github.com/bashonly) - [Extract all web formats](https://github.com/yt-dlp/yt-dlp/commit/4ccd73fea0f6f4be343e1ec7f22dd03799addcf8) ([#9960](https://github.com/yt-dlp/yt-dlp/issues/9960)) by [bashonly](https://github.com/bashonly) - [Extract via mobile API only if extractor-arg is passed](https://github.com/yt-dlp/yt-dlp/commit/41ba4a808b597a3afed78c89675a30deb6844450) ([#9938](https://github.com/yt-dlp/yt-dlp/issues/9938)) by [bashonly](https://github.com/bashonly) - [Fix subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/eef1e9f44ff14c5e65b759bb1eafa3946cdaf719) ([#9961](https://github.com/yt-dlp/yt-dlp/issues/9961)) by [bashonly](https://github.com/bashonly) - collection: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/119d41f27061d220d276a2d38cfc8d873437452a) ([#9986](https://github.com/yt-dlp/yt-dlp/issues/9986)) by [bashonly](https://github.com/bashonly), [imanoreotwe](https://github.com/imanoreotwe) - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/347f13dd9bccc2b4db3ea25689410d45d8370ed4) ([#9661](https://github.com/yt-dlp/yt-dlp/issues/9661)) by [bashonly](https://github.com/bashonly) - **tv5monde**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6db96268c521e945d42649607db1574f5d92e082) ([#9143](https://github.com/yt-dlp/yt-dlp/issues/9143)) by [alard](https://github.com/alard), [seproDev](https://github.com/seproDev) - **twitter** - [Fix auth for x.com migration](https://github.com/yt-dlp/yt-dlp/commit/3e35aa32c74bc108375be8c8b6b3bfc90dfff1b4) ([#9952](https://github.com/yt-dlp/yt-dlp/issues/9952)) by [bashonly](https://github.com/bashonly) - [Support x.com URLs](https://github.com/yt-dlp/yt-dlp/commit/4813173e4544f125d6f2afc31e600727d761b8dd) ([#9926](https://github.com/yt-dlp/yt-dlp/issues/9926)) by [bashonly](https://github.com/bashonly) - **vk**: [Improve format extraction](https://github.com/yt-dlp/yt-dlp/commit/df5c9e733aaba703cf285c0372b6d61629330c82) ([#9885](https://github.com/yt-dlp/yt-dlp/issues/9885)) by [seproDev](https://github.com/seproDev) - **wrestleuniverse**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/c4853655cb9a793129280806af643de43c48f4d5) ([#9800](https://github.com/yt-dlp/yt-dlp/issues/9800)) by [bashonly](https://github.com/bashonly) - **xiaohongshu**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2e9031605d87c469be9ce98dbbdf4960b727338) ([#9646](https://github.com/yt-dlp/yt-dlp/issues/9646)) by [HobbyistDev](https://github.com/HobbyistDev) - **xvideos**: quickies: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b207d26f83fb8ab0ce56df74dff43ff583a3264f) ([#9834](https://github.com/yt-dlp/yt-dlp/issues/9834)) by [JakeFinley96](https://github.com/JakeFinley96) - **youporn**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/351368cb9a6731b886a58f5a10fd6b302bbe47be) ([#8827](https://github.com/yt-dlp/yt-dlp/issues/8827)) by [The-MAGI](https://github.com/The-MAGI) - **youtube** - [Add `mediaconnect` client](https://github.com/yt-dlp/yt-dlp/commit/cf212d0a331aba05c32117573f760cdf3af8c62f) ([#9546](https://github.com/yt-dlp/yt-dlp/issues/9546)) by [clienthax](https://github.com/clienthax) - [Extract upload timestamp if available](https://github.com/yt-dlp/yt-dlp/commit/96a134dea6397a5f2131947c427aac52c8b4e677) ([#9856](https://github.com/yt-dlp/yt-dlp/issues/9856)) by [coletdjnz](https://github.com/coletdjnz) - [Fix comments extraction](https://github.com/yt-dlp/yt-dlp/commit/8e15177b4113c355989881e4e030f695a9b59c3a) ([#9775](https://github.com/yt-dlp/yt-dlp/issues/9775)) by [bbilly1](https://github.com/bbilly1), [jakeogh](https://github.com/jakeogh), [minamotorin](https://github.com/minamotorin), [shoxie007](https://github.com/shoxie007) - [Remove `android` from default clients](https://github.com/yt-dlp/yt-dlp/commit/12d8ea8246fa901de302ff5cc748caddadc82f41) ([#9553](https://github.com/yt-dlp/yt-dlp/issues/9553)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) - **zenyandex**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c4b87dd885ee5391e5f481e7c8bd550a7c543623) ([#9813](https://github.com/yt-dlp/yt-dlp/issues/9813)) by [src-tinkerer](https://github.com/src-tinkerer) #### Networking changes - [Add `extensions` attribute to `Response`](https://github.com/yt-dlp/yt-dlp/commit/bec9a59e8ec82c18e3bf9268eaa436793dd52e35) ([#9756](https://github.com/yt-dlp/yt-dlp/issues/9756)) by [bashonly](https://github.com/bashonly) - **Request Handler** - requests - [Patch support for `requests` 2.32.2+](https://github.com/yt-dlp/yt-dlp/commit/3f7999533ebe41c2a579d91b4e4cb211cfcd3bc0) ([#9992](https://github.com/yt-dlp/yt-dlp/issues/9992)) by [Grub4K](https://github.com/Grub4K) - [Update to `requests` 2.32.0](https://github.com/yt-dlp/yt-dlp/commit/c36513f1be2ef3d3cec864accbffda1afaa06ffd) ([#9980](https://github.com/yt-dlp/yt-dlp/issues/9980)) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - [Add `hatch`, `ruff`, `pre-commit` and improve dev docs](https://github.com/yt-dlp/yt-dlp/commit/e897bd8292a41999cf51dba91b390db5643c72db) ([#7409](https://github.com/yt-dlp/yt-dlp/issues/7409)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) - **build** - [Migrate `linux_exe` to static musl builds](https://github.com/yt-dlp/yt-dlp/commit/ac817bc83efd939dca3e40c4b527d0ccfc77172b) ([#9811](https://github.com/yt-dlp/yt-dlp/issues/9811)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Normalize `curl_cffi` group to `curl-cffi`](https://github.com/yt-dlp/yt-dlp/commit/02483bea1c4dbe1bace8ca4d19700104fbb8a00f) ([#9698](https://github.com/yt-dlp/yt-dlp/issues/9698)) by [bashonly](https://github.com/bashonly) (With fixes in [89f535e](https://github.com/yt-dlp/yt-dlp/commit/89f535e2656964b4061c25a7739d4d6ba0a30568)) - [Run `macos_legacy` job on `macos-12`](https://github.com/yt-dlp/yt-dlp/commit/1a366403d9c26b992faa77e00f4d02ead57559e3) ([#9804](https://github.com/yt-dlp/yt-dlp/issues/9804)) by [bashonly](https://github.com/bashonly) - [`macos` job requires `setuptools<70`](https://github.com/yt-dlp/yt-dlp/commit/78c57cc0e0998b8ed90e4306f410aa4be4115cd7) ([#9993](https://github.com/yt-dlp/yt-dlp/issues/9993)) by [bashonly](https://github.com/bashonly) - **cleanup** - [Remove questionable extractors](https://github.com/yt-dlp/yt-dlp/commit/01395a34345d1c6ba1b73ca92f94dd200dc45341) ([#9911](https://github.com/yt-dlp/yt-dlp/issues/9911)) by [seproDev](https://github.com/seproDev) - Miscellaneous: [5c019f6](https://github.com/yt-dlp/yt-dlp/commit/5c019f6328ad40d66561eac3c4de0b3cd070d0f6), [ae2af11](https://github.com/yt-dlp/yt-dlp/commit/ae2af1104f80caf2f47544763a33db2c17a3e1de) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) - **test** - [Add HTTP proxy tests](https://github.com/yt-dlp/yt-dlp/commit/3c7a287e281d9f9a353dce8902ff78a84c24a040) ([#9578](https://github.com/yt-dlp/yt-dlp/issues/9578)) by [coletdjnz](https://github.com/coletdjnz) - [Fix connect timeout test](https://github.com/yt-dlp/yt-dlp/commit/53b4d44f55cca66ac33dab092ef2a30b1164b684) ([#9906](https://github.com/yt-dlp/yt-dlp/issues/9906)) by [coletdjnz](https://github.com/coletdjnz) ### 2024.04.09 #### Important changes - Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p) - The shell escape function now properly escapes `%`, `\` and `\n`. - `utils.Popen` has been patched accordingly. #### Core changes - [Add new option `--progress-delta`](https://github.com/yt-dlp/yt-dlp/commit/9590cc6b4768e190183d7d071a6c78170889116a) ([#9082](https://github.com/yt-dlp/yt-dlp/issues/9082)) by [Grub4K](https://github.com/Grub4K) - [Add new options `--impersonate` and `--list-impersonate-targets`](https://github.com/yt-dlp/yt-dlp/commit/0b81d4d252bd065ccd352722987ea34fe17f9244) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - [Add option `--no-break-on-existing`](https://github.com/yt-dlp/yt-dlp/commit/16be117729150b2784f3b17755c886cb0cf73374) ([#9610](https://github.com/yt-dlp/yt-dlp/issues/9610)) by [bashonly](https://github.com/bashonly) - [Fix `filesize_approx` calculation](https://github.com/yt-dlp/yt-dlp/commit/86e3b82261e8ebc6c6707c09544c9dfb8907c0fd) ([#9560](https://github.com/yt-dlp/yt-dlp/issues/9560)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - [Infer `acodec` for single-codec containers](https://github.com/yt-dlp/yt-dlp/commit/86a972033e05fea80e5fe7f2aff6723dbe2f3952) by [pukkandan](https://github.com/pukkandan) - [Prevent RCE when using `--exec` with `%q` (CVE-2024-22423)](https://github.com/yt-dlp/yt-dlp/commit/ff07792676f404ffff6ee61b5638c9dc1a33a37a) by [Grub4K](https://github.com/Grub4K) - **cookies**: [Add `--cookies-from-browser` support for Firefox Flatpak](https://github.com/yt-dlp/yt-dlp/commit/2ab2651a4a7be18939e2b4cb21be79fe477c797a) ([#9619](https://github.com/yt-dlp/yt-dlp/issues/9619)) by [un-def](https://github.com/un-def) - **utils** - `traverse_obj` - [Allow unbranching using `all` and `any`](https://github.com/yt-dlp/yt-dlp/commit/3699eeb67cad333272b14a42dd3843d93fda1a2e) ([#9571](https://github.com/yt-dlp/yt-dlp/issues/9571)) by [Grub4K](https://github.com/Grub4K) - [Convenience improvements](https://github.com/yt-dlp/yt-dlp/commit/32abfb00bdbd119ca675fdc6d1719331f0a2741a) ([#9577](https://github.com/yt-dlp/yt-dlp/issues/9577)) by [Grub4K](https://github.com/Grub4K) #### Extractor changes - [Add extractor impersonate API](https://github.com/yt-dlp/yt-dlp/commit/50c29352312f5662acf9a64b0012766f5c40af61) ([#9474](https://github.com/yt-dlp/yt-dlp/issues/9474)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - **afreecatv** - [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/9415f1a5ef88482ebafe3083e8bcb778ac512df7) ([#9566](https://github.com/yt-dlp/yt-dlp/issues/9566)) by [bashonly](https://github.com/bashonly), [Tomoka1](https://github.com/Tomoka1) - live: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9073ae6458f4c6a832aa832c67174c61852869be) ([#9348](https://github.com/yt-dlp/yt-dlp/issues/9348)) by [hui1601](https://github.com/hui1601) - **asobistage**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0284f1fee202302a78888420f933deae19d9f4e1) ([#8735](https://github.com/yt-dlp/yt-dlp/issues/8735)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **box**: [Support URLs without file IDs](https://github.com/yt-dlp/yt-dlp/commit/07f5b2f7570fd9ac85aed17f4c0118f6eac77beb) ([#9504](https://github.com/yt-dlp/yt-dlp/issues/9504)) by [shreyasminocha](https://github.com/shreyasminocha) - **cbc.ca**: player: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/b49d5ffc53a72d8245ba319ff07bdc5b8c6a4f0c) ([#9561](https://github.com/yt-dlp/yt-dlp/issues/9561)) by [trainman261](https://github.com/trainman261) - **crunchyroll** - [Extract `vo_adaptive_hls` formats by default](https://github.com/yt-dlp/yt-dlp/commit/be77923ffe842f667971019460f6005f3cad01eb) ([#9447](https://github.com/yt-dlp/yt-dlp/issues/9447)) by [bashonly](https://github.com/bashonly) - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/954e57e405f79188450eb30103a9308732cd318f) ([#9615](https://github.com/yt-dlp/yt-dlp/issues/9615)) by [bytedream](https://github.com/bytedream) - **dropbox**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/a48cc86d6f6b20427553620c2ddb990ede6a4b41) ([#9627](https://github.com/yt-dlp/yt-dlp/issues/9627)) by [bashonly](https://github.com/bashonly) - **fathom**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/bc2b8c0596fd6b75af24822c4f0f1da6783d71f7) ([#9495](https://github.com/yt-dlp/yt-dlp/issues/9495)) by [src-tinkerer](https://github.com/src-tinkerer) - **gofile**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0da66980d3193cad3dae0120cddddbfcabddf7a1) ([#9446](https://github.com/yt-dlp/yt-dlp/issues/9446)) by [jazz1611](https://github.com/jazz1611) - **imgur**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/86d2f4d24849af0d1f3af7c0e2ac43bf8a058f74) ([#9471](https://github.com/yt-dlp/yt-dlp/issues/9471)) by [trwstin](https://github.com/trwstin) - **jiosaavn** - [Extract artists](https://github.com/yt-dlp/yt-dlp/commit/0ae16ceb1846cc4e609b70ce7c5d8e7458efceb2) ([#9612](https://github.com/yt-dlp/yt-dlp/issues/9612)) by [bashonly](https://github.com/bashonly) - [Fix format extensions](https://github.com/yt-dlp/yt-dlp/commit/443e206ec41e64ca2aef61d8ef91640fb69b3113) ([#9609](https://github.com/yt-dlp/yt-dlp/issues/9609)) by [bashonly](https://github.com/bashonly) - [Support playlists](https://github.com/yt-dlp/yt-dlp/commit/2e94602f241f6e41bdc48576c61089435529339b) ([#9622](https://github.com/yt-dlp/yt-dlp/issues/9622)) by [bashonly](https://github.com/bashonly) - **joqrag**: [Fix live status detection](https://github.com/yt-dlp/yt-dlp/commit/f2fd449b46c4058222e1744f7a35caa20b2d003d) ([#9624](https://github.com/yt-dlp/yt-dlp/issues/9624)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **kick**: [Support browser impersonation](https://github.com/yt-dlp/yt-dlp/commit/c8a61a910096c77ce08dad5e1b2fbda5eb964156) ([#9611](https://github.com/yt-dlp/yt-dlp/issues/9611)) by [bashonly](https://github.com/bashonly) - **loom**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/f859ed3ba1e8b129ae6a467592c65687e73fbca1) ([#8686](https://github.com/yt-dlp/yt-dlp/issues/8686)) by [bashonly](https://github.com/bashonly), [hruzgar](https://github.com/hruzgar) - **medici**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4cd9e251b9abada107b10830de997bf4d79ca369) ([#9518](https://github.com/yt-dlp/yt-dlp/issues/9518)) by [Offert4324](https://github.com/Offert4324) - **mixch** - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4c3b7a0769706f7f0ea24adf1f219d5ae82d2b07) ([#9608](https://github.com/yt-dlp/yt-dlp/issues/9608)) by [bashonly](https://github.com/bashonly), [nipotan](https://github.com/nipotan) - archive: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c59de48e2bb4c681b03b93b584a05f52609ce4a0) ([#8761](https://github.com/yt-dlp/yt-dlp/issues/8761)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **nhk**: [Fix NHK World extractors](https://github.com/yt-dlp/yt-dlp/commit/4af9d5c2f6aa81403ae2a8a5ae3cc824730f0b86) ([#9623](https://github.com/yt-dlp/yt-dlp/issues/9623)) by [bashonly](https://github.com/bashonly) - **patreon**: [Do not extract dead embed URLs](https://github.com/yt-dlp/yt-dlp/commit/36b240f9a72af57eb2c9d927ebb7fd1c917ebf18) ([#9613](https://github.com/yt-dlp/yt-dlp/issues/9613)) by [johnvictorfs](https://github.com/johnvictorfs) - **radio1be**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/36baaa10e06715ccba06b78885b2042c4844c826) ([#9122](https://github.com/yt-dlp/yt-dlp/issues/9122)) by [HobbyistDev](https://github.com/HobbyistDev) - **sharepoint**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ff349ff94aae0b2b148bd3670f7c91d39c2f1d8e) ([#6531](https://github.com/yt-dlp/yt-dlp/issues/6531)) by [bashonly](https://github.com/bashonly), [C0D3D3V](https://github.com/C0D3D3V) - **sonylivseries**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/f2868b26e917354203f82a370ad2396646edb813) ([#9423](https://github.com/yt-dlp/yt-dlp/issues/9423)) by [bashonly](https://github.com/bashonly) - **soundcloud** - [Adjust format sorting](https://github.com/yt-dlp/yt-dlp/commit/a2d0840739cddd585d24e0ce4796394fc8a4fa2e) ([#9584](https://github.com/yt-dlp/yt-dlp/issues/9584)) by [bashonly](https://github.com/bashonly) - [Support cookies](https://github.com/yt-dlp/yt-dlp/commit/97362712a1f2b04e735bdf54f749ad99165a62fe) ([#9586](https://github.com/yt-dlp/yt-dlp/issues/9586)) by [bashonly](https://github.com/bashonly) - [Support retries for API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/246571ae1d867df8bf31a056bdf3bbbfd398366a) ([#9585](https://github.com/yt-dlp/yt-dlp/issues/9585)) by [bashonly](https://github.com/bashonly) - **thisoldhouse**: [Support Brightcove embeds](https://github.com/yt-dlp/yt-dlp/commit/0df63cce69026d2f4c0cbb4dd36163e83eac93dc) ([#9576](https://github.com/yt-dlp/yt-dlp/issues/9576)) by [bashonly](https://github.com/bashonly) - **tiktok** - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/cb61e20c266facabb7a30f9ce53bd79dfc158475) ([#9548](https://github.com/yt-dlp/yt-dlp/issues/9548)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Prefer non-bytevc2 formats](https://github.com/yt-dlp/yt-dlp/commit/63f685f341f35f6f02b0368d1ba53bdb5b520410) ([#9575](https://github.com/yt-dlp/yt-dlp/issues/9575)) by [bashonly](https://github.com/bashonly) - [Restore `carrier_region` API parameter](https://github.com/yt-dlp/yt-dlp/commit/fc53ec13ff1ee926a3e533a68cfca8acc887b661) ([#9637](https://github.com/yt-dlp/yt-dlp/issues/9637)) by [bashonly](https://github.com/bashonly) - [Update API hostname](https://github.com/yt-dlp/yt-dlp/commit/8c05b3ebae23c5b444857549a85b84004c01a536) ([#9444](https://github.com/yt-dlp/yt-dlp/issues/9444)) by [bashonly](https://github.com/bashonly) - **twitch**: [Extract AV1 and HEVC formats](https://github.com/yt-dlp/yt-dlp/commit/02f93ff51b3ff9436d60c4993562b366eaae8851) ([#9158](https://github.com/yt-dlp/yt-dlp/issues/9158)) by [kasper93](https://github.com/kasper93) - **vkplay**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/b15b0c1d2106437ec61a5c436c543e8760eac160) ([#9636](https://github.com/yt-dlp/yt-dlp/issues/9636)) by [bashonly](https://github.com/bashonly) - **xvideos**: [Support new URL format](https://github.com/yt-dlp/yt-dlp/commit/aa7e9ae4f48276bd5d0173966c77db9484f65a0a) ([#9502](https://github.com/yt-dlp/yt-dlp/issues/9502)) by [sta1us](https://github.com/sta1us) - **youtube** - [Calculate more accurate `filesize`](https://github.com/yt-dlp/yt-dlp/commit/a25a424323267e3f6f9f63c0b62df499bd7b8d46) by [pukkandan](https://github.com/pukkandan) - [Update `android` params](https://github.com/yt-dlp/yt-dlp/commit/e7b17fce14775bd2448695c8eb7379b8d31d3537) by [pukkandan](https://github.com/pukkandan) - search: [Fix params for uncensored results](https://github.com/yt-dlp/yt-dlp/commit/17d248a58781e2588d18a5ebe00c441d10011fcd) ([#9456](https://github.com/yt-dlp/yt-dlp/issues/9456)) by [alb](https://github.com/alb), [pukkandan](https://github.com/pukkandan) #### Downloader changes - **ffmpeg**: [Accept output args from info dict](https://github.com/yt-dlp/yt-dlp/commit/9c42b7eef547e826e9fcc7beb6706a2523949d05) ([#9278](https://github.com/yt-dlp/yt-dlp/issues/9278)) by [bashonly](https://github.com/bashonly) #### Networking changes - [Respect `SSLKEYLOGFILE` environment variable](https://github.com/yt-dlp/yt-dlp/commit/79a451e5763eda8b10d00684d5d3378f3255ee01) ([#9543](https://github.com/yt-dlp/yt-dlp/issues/9543)) by [luiso1979](https://github.com/luiso1979) - **Request Handler** - curlcffi: [Add support for `curl_cffi`](https://github.com/yt-dlp/yt-dlp/commit/52f5be1f1e0dc45bb397ab950f564721976a39bf) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - websockets: [Workaround race condition causing issues on PyPy](https://github.com/yt-dlp/yt-dlp/commit/e5d4f11104ce7ea1717a90eea82c0f7d230ea5d5) ([#9514](https://github.com/yt-dlp/yt-dlp/issues/9514)) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - **build** - [Do not include `curl_cffi` in `macos_legacy`](https://github.com/yt-dlp/yt-dlp/commit/b19ae095fdddd43c2a2c67d10fbe0d9a645bb98f) ([#9653](https://github.com/yt-dlp/yt-dlp/issues/9653)) by [bashonly](https://github.com/bashonly) - [Optional dependencies cleanup](https://github.com/yt-dlp/yt-dlp/commit/58dd0f8d1eee6bc9fdc57f1923bed772fa3c946d) ([#9550](https://github.com/yt-dlp/yt-dlp/issues/9550)) by [bashonly](https://github.com/bashonly) - [Print SHA sums to GHA logs](https://github.com/yt-dlp/yt-dlp/commit/e8032503b9517465b0e86d776fc1e60d8795d673) ([#9582](https://github.com/yt-dlp/yt-dlp/issues/9582)) by [bashonly](https://github.com/bashonly) - [Update changelog for tarball and sdist](https://github.com/yt-dlp/yt-dlp/commit/17b96974a334688f76b57d350e07cae8cda46877) ([#9425](https://github.com/yt-dlp/yt-dlp/issues/9425)) by [bashonly](https://github.com/bashonly) - **cleanup** - [Standardize `import datetime as dt`](https://github.com/yt-dlp/yt-dlp/commit/c305a25c1b16bcf7a5ec499c3b786ed1e2c748da) ([#8978](https://github.com/yt-dlp/yt-dlp/issues/8978)) by [pukkandan](https://github.com/pukkandan) - ie: [No `from` stdlib imports in extractors](https://github.com/yt-dlp/yt-dlp/commit/e3a3ed8a981d9395c4859b6ef56cd02bc3148db2) by [pukkandan](https://github.com/pukkandan) - Miscellaneous: [216f6a3](https://github.com/yt-dlp/yt-dlp/commit/216f6a3cb57824e6a3c859649ce058c199b1b247) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) - **docs** - [Update yt-dlp tagline](https://github.com/yt-dlp/yt-dlp/commit/388c979ac63a8774339fac2516fe1cc852b4276e) ([#9481](https://github.com/yt-dlp/yt-dlp/issues/9481)) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - [Various manpage fixes](https://github.com/yt-dlp/yt-dlp/commit/df0e138fc02ae2764a44f2f59fc93c756c4d3ee2) by [leoheitmannruiz](https://github.com/leoheitmannruiz) - **test** - [Workaround websocket server hanging](https://github.com/yt-dlp/yt-dlp/commit/f849d77ab54788446b995d256e1ee0894c4fb927) ([#9467](https://github.com/yt-dlp/yt-dlp/issues/9467)) by [coletdjnz](https://github.com/coletdjnz) - `traversal`: [Separate traversal tests](https://github.com/yt-dlp/yt-dlp/commit/979ce2e786f2ee3fc783b6dc1ef4188d8805c923) ([#9574](https://github.com/yt-dlp/yt-dlp/issues/9574)) by [Grub4K](https://github.com/Grub4K) ### 2024.03.10 #### Core changes - [Add `--compat-options 2023`](https://github.com/yt-dlp/yt-dlp/commit/3725b4f0c93ca3943e6300013a9670e4ab757fda) ([#9084](https://github.com/yt-dlp/yt-dlp/issues/9084)) by [Grub4K](https://github.com/Grub4K) (With fixes in [ffff1bc](https://github.com/yt-dlp/yt-dlp/commit/ffff1bc6598fc7a9258e51bc153cab812467f9f9) by [pukkandan](https://github.com/pukkandan)) - [Create `ydl._request_director` when needed](https://github.com/yt-dlp/yt-dlp/commit/069b2aedae2279668b6051627a81fc4fbd9c146a) by [pukkandan](https://github.com/pukkandan) (With fixes in [dbd8b1b](https://github.com/yt-dlp/yt-dlp/commit/dbd8b1bff9afd8f05f982bcd52c20bc173c266ca) by [Grub4k](https://github.com/Grub4k)) - [Don't select storyboard formats as fallback](https://github.com/yt-dlp/yt-dlp/commit/d63eae7e7ffb1f3e733e552b9e5e82355bfba214) by [bashonly](https://github.com/bashonly) - [Handle `--load-info-json` format selection errors](https://github.com/yt-dlp/yt-dlp/commit/263a4b55ac17a796e8991ca8d2d86a3c349f8a60) ([#9392](https://github.com/yt-dlp/yt-dlp/issues/9392)) by [bashonly](https://github.com/bashonly) - [Warn user when not launching through shell on Windows](https://github.com/yt-dlp/yt-dlp/commit/6a6cdcd1824a14e3b336332c8f31f65497b8c4b8) ([#9250](https://github.com/yt-dlp/yt-dlp/issues/9250)) by [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) - **cookies** - [Fix `--cookies-from-browser` for `snap` Firefox](https://github.com/yt-dlp/yt-dlp/commit/cbed249aaa053a3f425b9bafc97f8dbd71c44487) ([#9016](https://github.com/yt-dlp/yt-dlp/issues/9016)) by [Grub4K](https://github.com/Grub4K) - [Fix `--cookies-from-browser` with macOS Firefox profiles](https://github.com/yt-dlp/yt-dlp/commit/85b33f5c163f60dbd089a6b9bc2ba1366d3ddf93) ([#8909](https://github.com/yt-dlp/yt-dlp/issues/8909)) by [RalphORama](https://github.com/RalphORama) - [Improve error message for Windows `--cookies-from-browser chrome` issue](https://github.com/yt-dlp/yt-dlp/commit/2792092afd367e39251ace1fb2819c855ab8919f) ([#9080](https://github.com/yt-dlp/yt-dlp/issues/9080)) by [Grub4K](https://github.com/Grub4K) - **plugins**: [Handle `PermissionError`](https://github.com/yt-dlp/yt-dlp/commit/9a8afadd172b7cab143f0049959fa64973589d94) ([#9229](https://github.com/yt-dlp/yt-dlp/issues/9229)) by [pukkandan](https://github.com/pukkandan), [syntaxsurge](https://github.com/syntaxsurge) - **utils** - [Improve `repr` of `DateRange`, `match_filter_func`](https://github.com/yt-dlp/yt-dlp/commit/45491a2a30da4d1723cfa9288cb664813bb09afb) by [pukkandan](https://github.com/pukkandan) - `traverse_obj`: [Support `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/ffbd4f2a02fee387ea5e0a267ce32df5259111ac) ([#8911](https://github.com/yt-dlp/yt-dlp/issues/8911)) by [Grub4K](https://github.com/Grub4K) - **webvtt**: [Don't parse single fragment files](https://github.com/yt-dlp/yt-dlp/commit/f24e44e8cbd88ce338d52f594a19330f64d38b50) ([#9034](https://github.com/yt-dlp/yt-dlp/issues/9034)) by [seproDev](https://github.com/seproDev) #### Extractor changes - [Migrate commonly plural fields to lists](https://github.com/yt-dlp/yt-dlp/commit/104a7b5a46dc1805157fb4cc11c05876934d37c1) ([#8917](https://github.com/yt-dlp/yt-dlp/issues/8917)) by [llistochek](https://github.com/llistochek), [pukkandan](https://github.com/pukkandan) (With fixes in [b136e2a](https://github.com/yt-dlp/yt-dlp/commit/b136e2af341f7a88028aea4c5cd50efe2fa9b182) by [bashonly](https://github.com/bashonly)) - [Support multi-period MPD streams](https://github.com/yt-dlp/yt-dlp/commit/4ce57d3b873c2887814cbec03d029533e82f7db5) ([#6654](https://github.com/yt-dlp/yt-dlp/issues/6654)) by [alard](https://github.com/alard), [pukkandan](https://github.com/pukkandan) - **abematv** - [Fix extraction with cache](https://github.com/yt-dlp/yt-dlp/commit/c51316f8a69fbd0080f2720777d42ab438e254a3) ([#8895](https://github.com/yt-dlp/yt-dlp/issues/8895)) by [sefidel](https://github.com/sefidel) - [Support login for playlists](https://github.com/yt-dlp/yt-dlp/commit/8226a3818f804478c756cf460baa9bf3a3b062a5) ([#8901](https://github.com/yt-dlp/yt-dlp/issues/8901)) by [sefidel](https://github.com/sefidel) - **adn** - [Add support for German site](https://github.com/yt-dlp/yt-dlp/commit/5eb1458be4767385a9bf1d570ff08e46100cbaa2) ([#8708](https://github.com/yt-dlp/yt-dlp/issues/8708)) by [infanf](https://github.com/infanf) - [Improve auth error handling](https://github.com/yt-dlp/yt-dlp/commit/9526b1f179d19f75284eceaa5e0ee381af18cf19) ([#9068](https://github.com/yt-dlp/yt-dlp/issues/9068)) by [infanf](https://github.com/infanf) - **aenetworks**: [Rating should be optional for AP extraction](https://github.com/yt-dlp/yt-dlp/commit/014cb5774d7afe624b6eb4e07f7be924b9e5e186) ([#9005](https://github.com/yt-dlp/yt-dlp/issues/9005)) by [agibson-fl](https://github.com/agibson-fl) - **altcensored**: channel: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/e28e135d6fd6a430fed3e20dfe1a8c8bbc5f9185) ([#9297](https://github.com/yt-dlp/yt-dlp/issues/9297)) by [marcdumais](https://github.com/marcdumais) - **amadeustv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e641aab7a61df7406df60ebfe0c77bd5186b2b41) ([#8744](https://github.com/yt-dlp/yt-dlp/issues/8744)) by [ArnauvGilotra](https://github.com/ArnauvGilotra) - **ant1newsgrembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1ed5ee2f045f717e814f84ba461dadc58e712266) ([#9191](https://github.com/yt-dlp/yt-dlp/issues/9191)) by [seproDev](https://github.com/seproDev) - **archiveorg**: [Fix format URL encoding](https://github.com/yt-dlp/yt-dlp/commit/3894ab9574748188bbacbd925a3971eda6fa2bb0) ([#9279](https://github.com/yt-dlp/yt-dlp/issues/9279)) by [bashonly](https://github.com/bashonly) - **ard** - mediathek - [Revert to using old id](https://github.com/yt-dlp/yt-dlp/commit/b6951271ac014761c9c317b9cecd5e8e139cfa7c) ([#8916](https://github.com/yt-dlp/yt-dlp/issues/8916)) by [Grub4K](https://github.com/Grub4K) - [Support cookies to verify age](https://github.com/yt-dlp/yt-dlp/commit/c099ec9392b0283dde34b290d1a04158ad8eb882) ([#9037](https://github.com/yt-dlp/yt-dlp/issues/9037)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) - **art19**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/999ea80beb053491089d256104c4188aced3110f) ([#9099](https://github.com/yt-dlp/yt-dlp/issues/9099)) by [seproDev](https://github.com/seproDev) - **artetv**: [Separate closed captions](https://github.com/yt-dlp/yt-dlp/commit/393b487a4ea391c44e811505ec98531031d7e81e) ([#8231](https://github.com/yt-dlp/yt-dlp/issues/8231)) by [Nicals](https://github.com/Nicals), [seproDev](https://github.com/seproDev) - **asobichannel**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/12f042740550c06552819374e2251deb7a519bab) ([#8700](https://github.com/yt-dlp/yt-dlp/issues/8700)) by [Snack-X](https://github.com/Snack-X) - **bigo**: [Fix JSON extraction](https://github.com/yt-dlp/yt-dlp/commit/85a2d07c1f82c2082b568963d1c32ad3fc848f61) ([#8893](https://github.com/yt-dlp/yt-dlp/issues/8893)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **bilibili** - [Add referer header and fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/1713c882730a928ac344c099874d2093fc2c8b51) ([#8832](https://github.com/yt-dlp/yt-dlp/issues/8832)) by [SirElderling](https://github.com/SirElderling) (With fixes in [f1570ab](https://github.com/yt-dlp/yt-dlp/commit/f1570ab84d5f49564256c620063d2d3e9ed4acf0) by [TobiX](https://github.com/TobiX)) - [Support `--no-playlist`](https://github.com/yt-dlp/yt-dlp/commit/e439693f729daf6fb15457baea1bca10ef5da34d) ([#9139](https://github.com/yt-dlp/yt-dlp/issues/9139)) by [c-basalt](https://github.com/c-basalt) - **bilibilisearch**: [Set cookie to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ffa017cfc5973b265c92248546fcf5020dc43eaf) ([#9119](https://github.com/yt-dlp/yt-dlp/issues/9119)) by [c-basalt](https://github.com/c-basalt) - **biliintl**: [Fix and improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/cf6413e840476c15e5b166dc2f7cc2a90a4a9aad) ([#7077](https://github.com/yt-dlp/yt-dlp/issues/7077)) by [dirkf](https://github.com/dirkf), [HobbyistDev](https://github.com/HobbyistDev), [itachi-19](https://github.com/itachi-19), [seproDev](https://github.com/seproDev) - **boosty**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/540b68298192874c75ad5ee4589bed64d02a7d55) ([#9144](https://github.com/yt-dlp/yt-dlp/issues/9144)) by [un-def](https://github.com/un-def) - **ccma**: [Extract 1080p DASH formats](https://github.com/yt-dlp/yt-dlp/commit/4253e3b7f483127bd812bdac02466f4a5b47ff34) ([#9130](https://github.com/yt-dlp/yt-dlp/issues/9130)) by [seproDev](https://github.com/seproDev) - **cctv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/6ad11fef65474bcf70f3a8556850d93c141e44a2) ([#9325](https://github.com/yt-dlp/yt-dlp/issues/9325)) by [src-tinkerer](https://github.com/src-tinkerer) - **chzzk** - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ba6b0c8261e9f0a6373885736ff90a89dd1fb614) ([#8887](https://github.com/yt-dlp/yt-dlp/issues/8887)) by [DmitryScaletta](https://github.com/DmitryScaletta) - live: [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/804f2366117b7065552a1c3cddb9ec19b688a5c1) ([#9309](https://github.com/yt-dlp/yt-dlp/issues/9309)) by [hui1601](https://github.com/hui1601) - **cineverse**: [Detect when login required](https://github.com/yt-dlp/yt-dlp/commit/fc2cc626f07328a6c71b5e21853e4cfa7b1e6256) ([#9081](https://github.com/yt-dlp/yt-dlp/issues/9081)) by [garret1317](https://github.com/garret1317) - **cloudflarestream** - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/4d9dc0abe24ad5d9d22a16f40fc61137dcd103f7) ([#9007](https://github.com/yt-dlp/yt-dlp/issues/9007)) by [Bibhav48](https://github.com/Bibhav48) - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/f3d5face83f948c24bcb91e06d4fa6e8622d7d79) ([#9280](https://github.com/yt-dlp/yt-dlp/issues/9280)) by [bashonly](https://github.com/bashonly) - [Improve embed detection](https://github.com/yt-dlp/yt-dlp/commit/464c919ea82aefdf35f138a1ab2dd0bb8fb7fd0e) ([#9287](https://github.com/yt-dlp/yt-dlp/issues/9287)) by [bashonly](https://github.com/bashonly) - **cloudycdn, lsm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/5dda3b291f59f388f953337e9fb09a94b64aaf34) ([#8643](https://github.com/yt-dlp/yt-dlp/issues/8643)) by [Caesim404](https://github.com/Caesim404) - **cnbc**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/998dffb5a2343ec709b3d6bbf2bf019649080239) ([#8741](https://github.com/yt-dlp/yt-dlp/issues/8741)) by [gonzalezjo](https://github.com/gonzalezjo), [Noor-5](https://github.com/Noor-5), [ruiminggu](https://github.com/ruiminggu), [seproDev](https://github.com/seproDev), [zhijinwuu](https://github.com/zhijinwuu) - **craftsy**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/96f3924bac174f2fd401f86f78e77d7e0c5ee008) ([#9384](https://github.com/yt-dlp/yt-dlp/issues/9384)) by [bashonly](https://github.com/bashonly) - **crooksandliars**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03536126d32bd861e38536371f0cd5f1b71dcb7a) ([#9192](https://github.com/yt-dlp/yt-dlp/issues/9192)) by [seproDev](https://github.com/seproDev) - **crtvg**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/785ab1af7f131e73444634ad57b39478651a43d3) ([#9404](https://github.com/yt-dlp/yt-dlp/issues/9404)) by [Xpl0itU](https://github.com/Xpl0itU) - **dailymotion**: [Support search](https://github.com/yt-dlp/yt-dlp/commit/11ffa92a61e5847b3dfa8975f91ecb3ac2178841) ([#8292](https://github.com/yt-dlp/yt-dlp/issues/8292)) by [drzraf](https://github.com/drzraf), [seproDev](https://github.com/seproDev) - **douyin**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9ff946645568e71046487571eefa9cb524a5189b) ([#9239](https://github.com/yt-dlp/yt-dlp/issues/9239)) by [114514ns](https://github.com/114514ns), [bashonly](https://github.com/bashonly) (With fixes in [e546e5d](https://github.com/yt-dlp/yt-dlp/commit/e546e5d3b33a50075e574a2e7b8eda7ea874d21e) by [bashonly](https://github.com/bashonly)) - **duboku**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d4187da90a6b85f4ebae4bb07693cc9b412d75) ([#9161](https://github.com/yt-dlp/yt-dlp/issues/9161)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **dumpert**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/eedb38ce4093500e19279d50b708fb9c18bf4dbf) ([#9320](https://github.com/yt-dlp/yt-dlp/issues/9320)) by [rvsit](https://github.com/rvsit) - **elementorembed**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6171b050d70435008e64fa06aa6f19c4e5bec75f) ([#8948](https://github.com/yt-dlp/yt-dlp/issues/8948)) by [pompos02](https://github.com/pompos02), [seproDev](https://github.com/seproDev) - **eporner**: [Extract AV1 formats](https://github.com/yt-dlp/yt-dlp/commit/96d0f8c1cb8aec250c5614bfde6b5fb95f10819b) ([#9028](https://github.com/yt-dlp/yt-dlp/issues/9028)) by [michal-repo](https://github.com/michal-repo) - **errjupiter** - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a514cc2feb1c3b265b19acab11487acad8bb3ab0) ([#8549](https://github.com/yt-dlp/yt-dlp/issues/8549)) by [glensc](https://github.com/glensc) - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/80ed8bdeba5a945f127ef9ab055a4823329a1210) ([#9218](https://github.com/yt-dlp/yt-dlp/issues/9218)) by [glensc](https://github.com/glensc) - **facebook** - [Add new ID format](https://github.com/yt-dlp/yt-dlp/commit/cf9af2c7f1fedd881a157b3fbe725e5494b00924) ([#3824](https://github.com/yt-dlp/yt-dlp/issues/3824)) by [kclauhk](https://github.com/kclauhk), [Wikidepia](https://github.com/Wikidepia) - [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2e30b5567b5c6113d46b39163db5b044aea8667e) by [jingtra](https://github.com/jingtra), [ringus1](https://github.com/ringus1) - [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/3c4d3ee491b0ec22ed3cade51d943d3d27141ba7) ([#9060](https://github.com/yt-dlp/yt-dlp/issues/9060)) by [kclauhk](https://github.com/kclauhk) - [Set format HTTP chunk size](https://github.com/yt-dlp/yt-dlp/commit/5b68c478fb0b93ea6b8fac23f50e12217fa063db) ([#9058](https://github.com/yt-dlp/yt-dlp/issues/9058)) by [bashonly](https://github.com/bashonly), [kclauhk](https://github.com/kclauhk) - [Support events](https://github.com/yt-dlp/yt-dlp/commit/9b5efaf86b99a2664fff9fc725d275f766c3221d) ([#9055](https://github.com/yt-dlp/yt-dlp/issues/9055)) by [kclauhk](https://github.com/kclauhk) - [Support permalink URLs](https://github.com/yt-dlp/yt-dlp/commit/87286e93af949c4e6a0f8ba34af6a1ab5aa102b6) ([#9061](https://github.com/yt-dlp/yt-dlp/issues/9061)) by [kclauhk](https://github.com/kclauhk) - ads: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a40b0070c2a00d3ed839897462171a82323aa875) ([#8870](https://github.com/yt-dlp/yt-dlp/issues/8870)) by [kclauhk](https://github.com/kclauhk) - **flextv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/4f043479090dc8a7e06e0bb53691e5414320dfb2) ([#9178](https://github.com/yt-dlp/yt-dlp/issues/9178)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **floatplane**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/9cd90447907a59c8a2727583f4a755fb23ed8cd3) ([#8934](https://github.com/yt-dlp/yt-dlp/issues/8934)) by [chtk](https://github.com/chtk) - **francetv** - [Fix DAI livestreams](https://github.com/yt-dlp/yt-dlp/commit/e4fbe5f886a6693f2466877c12e99c30c5442ace) ([#9380](https://github.com/yt-dlp/yt-dlp/issues/9380)) by [bashonly](https://github.com/bashonly) - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/9749ac7fecbfda391afbadf2870797ce0e382622) ([#9333](https://github.com/yt-dlp/yt-dlp/issues/9333)) by [bashonly](https://github.com/bashonly) - [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ede624d1db649f5a4b61f8abbb746f365322de27) ([#9347](https://github.com/yt-dlp/yt-dlp/issues/9347)) by [bashonly](https://github.com/bashonly) - **funk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/cd0443fb14e2ed805abb02792473457553a123d1) ([#9194](https://github.com/yt-dlp/yt-dlp/issues/9194)) by [seproDev](https://github.com/seproDev) - **generic**: [Follow https redirects properly](https://github.com/yt-dlp/yt-dlp/commit/c8c9039e640495700f76a13496e3418bdd4382ba) ([#9121](https://github.com/yt-dlp/yt-dlp/issues/9121)) by [seproDev](https://github.com/seproDev) - **getcourseru**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4310b6650eeb5630295f4591b37720877878c57a) ([#8873](https://github.com/yt-dlp/yt-dlp/issues/8873)) by [divStar](https://github.com/divStar), [seproDev](https://github.com/seproDev) - **gofile**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/77c2472ca1ef9050a66aa68bc5fa1bee88706c66) ([#9074](https://github.com/yt-dlp/yt-dlp/issues/9074)) by [jazz1611](https://github.com/jazz1611) - **googledrive**: [Fix source file extraction](https://github.com/yt-dlp/yt-dlp/commit/5498729c59b03a9511c64552da3ba2f802166f8d) ([#8990](https://github.com/yt-dlp/yt-dlp/issues/8990)) by [jazz1611](https://github.com/jazz1611) - **goplay**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7e90e34fa4617b53f8c8a9e69f460508cb1f51b0) ([#6654](https://github.com/yt-dlp/yt-dlp/issues/6654)) by [alard](https://github.com/alard) - **gopro**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4a07a455bbf7acf87550053bbba949c828e350ba) ([#9019](https://github.com/yt-dlp/yt-dlp/issues/9019)) by [stilor](https://github.com/stilor) - **ilpost**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aa5dcc4ee65916a36cbe1b1b5b29b9110c3163ed) ([#9001](https://github.com/yt-dlp/yt-dlp/issues/9001)) by [CapacitorSet](https://github.com/CapacitorSet) - **jiosaavnsong**: [Support more bitrates](https://github.com/yt-dlp/yt-dlp/commit/5154dc0a687528f995cde22b5ff63f82c740e98a) ([#8834](https://github.com/yt-dlp/yt-dlp/issues/8834)) by [alien-developers](https://github.com/alien-developers), [bashonly](https://github.com/bashonly) - **kukululive**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/20cdad5a2c0499d5a6746f5466a2ab0c97b75884) ([#8877](https://github.com/yt-dlp/yt-dlp/issues/8877)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **lefigarovideoembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9401736fd08767c58af45a1e36ff5929c5fa1ac9) ([#9198](https://github.com/yt-dlp/yt-dlp/issues/9198)) by [seproDev](https://github.com/seproDev) - **linkedin**: [Fix metadata and extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/017adb28e7fe7b8c8fc472332d86740f31141519) ([#9056](https://github.com/yt-dlp/yt-dlp/issues/9056)) by [barsnick](https://github.com/barsnick) - **magellantv**: [Support episodes](https://github.com/yt-dlp/yt-dlp/commit/3dc9232e1aa58fe3c2d8cafb50e8162d6f0e891e) ([#9199](https://github.com/yt-dlp/yt-dlp/issues/9199)) by [seproDev](https://github.com/seproDev) - **magentamusik**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5e2e24b2c5795756d81785b06b10723ddb6db7b2) ([#7790](https://github.com/yt-dlp/yt-dlp/issues/7790)) by [pwaldhauer](https://github.com/pwaldhauer), [seproDev](https://github.com/seproDev) - **medaltv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/02e343f6ef6d7b3f9087ff69e4a1db0b4b4a5c5d) ([#9098](https://github.com/yt-dlp/yt-dlp/issues/9098)) by [Danish-H](https://github.com/Danish-H) - **mlbarticle**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/50e06e21a68e336198198bda332b8e7d2314f201) ([#9021](https://github.com/yt-dlp/yt-dlp/issues/9021)) by [HobbyistDev](https://github.com/HobbyistDev) - **motherless**: [Support uploader playlists](https://github.com/yt-dlp/yt-dlp/commit/9f1e9dab21bbe651544c8f4663b0e615dc450e4d) ([#8994](https://github.com/yt-dlp/yt-dlp/issues/8994)) by [dasidiot](https://github.com/dasidiot) - **mujrozhlas**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/4170b3d7120e06db3391eef39c5add18a1ddf2c3) ([#9306](https://github.com/yt-dlp/yt-dlp/issues/9306)) by [bashonly](https://github.com/bashonly) - **mx3**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/5a63454b3637b3603434026cddfeac509218b90e) ([#8736](https://github.com/yt-dlp/yt-dlp/issues/8736)) by [martinxyz](https://github.com/martinxyz) - **naver**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/a281beba8d8f007cf220f96dd1d9412bb070c7d8) ([#8883](https://github.com/yt-dlp/yt-dlp/issues/8883)) by [seproDev](https://github.com/seproDev) - **nebula**: [Support podcasts](https://github.com/yt-dlp/yt-dlp/commit/0de09c5b9ed619d4a93d7c451c6ddff0381de808) ([#9140](https://github.com/yt-dlp/yt-dlp/issues/9140)) by [c-basalt](https://github.com/c-basalt), [seproDev](https://github.com/seproDev) - **nerdcubedfeed**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/29a74a6126101aabaa1726ae41b1ca55cf26e7a7) ([#9269](https://github.com/yt-dlp/yt-dlp/issues/9269)) by [seproDev](https://github.com/seproDev) - **newgrounds** - [Fix login and clean up extraction](https://github.com/yt-dlp/yt-dlp/commit/0fcefb92f3ebfc5cada19c1e85a715f020d0f333) ([#9356](https://github.com/yt-dlp/yt-dlp/issues/9356)) by [Grub4K](https://github.com/Grub4K), [mrmedieval](https://github.com/mrmedieval) - user: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3e083191cdc34dd8c482da9a9b4bc682f824cb9d) ([#9046](https://github.com/yt-dlp/yt-dlp/issues/9046)) by [u-spec-png](https://github.com/u-spec-png) - **nfb**: [Add support for onf.ca and series](https://github.com/yt-dlp/yt-dlp/commit/4b8b0dded8c65cd5b2ab2e858058ba98c9bf49ff) ([#8997](https://github.com/yt-dlp/yt-dlp/issues/8997)) by [bashonly](https://github.com/bashonly), [rrgomes](https://github.com/rrgomes) - **nhkradiru**: [Extract extended description](https://github.com/yt-dlp/yt-dlp/commit/4392447d9404e3c25cfeb8f5bdfff31b0448da39) ([#9162](https://github.com/yt-dlp/yt-dlp/issues/9162)) by [garret1317](https://github.com/garret1317) - **nhkradirulive**: [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/5af1f19787f7d652fce72dd3ab9536cdd980fe85) ([#8956](https://github.com/yt-dlp/yt-dlp/issues/8956)) by [garret1317](https://github.com/garret1317) - **niconico** - [Remove legacy danmaku extraction](https://github.com/yt-dlp/yt-dlp/commit/974d444039c8bbffb57265c6792cd52d169fe1b9) ([#9209](https://github.com/yt-dlp/yt-dlp/issues/9209)) by [pzhlkj6612](https://github.com/pzhlkj6612) - [Support DMS formats](https://github.com/yt-dlp/yt-dlp/commit/aa13a8e3dd3b698cc40ec438988b1ad834e11a41) ([#9282](https://github.com/yt-dlp/yt-dlp/issues/9282)) by [pzhlkj6612](https://github.com/pzhlkj6612), [xpadev-net](https://github.com/xpadev-net) (With fixes in [40966e8](https://github.com/yt-dlp/yt-dlp/commit/40966e8da27bbf770dacf9be9363fcc3ad72cc9f) by [pzhlkj6612](https://github.com/pzhlkj6612)) - **ninaprotocol**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/62c65bfaf81e04e6746f6fdbafe384eb3edddfbc) ([#8946](https://github.com/yt-dlp/yt-dlp/issues/8946)) by [RaduManole](https://github.com/RaduManole), [seproDev](https://github.com/seproDev) - **ninenews**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/43694ce13c5a9f1afca8b02b8b2b9b1576d6503d) ([#8840](https://github.com/yt-dlp/yt-dlp/issues/8840)) by [SirElderling](https://github.com/SirElderling) - **nova**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/c168d8791d0974a8a8fcb3b4a4bc2d830df51622) ([#9221](https://github.com/yt-dlp/yt-dlp/issues/9221)) by [seproDev](https://github.com/seproDev) - **ntvru**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7a29cbbd5fd7363e7e8535ee1506b7052465d13f) ([#9276](https://github.com/yt-dlp/yt-dlp/issues/9276)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf) - **nuum**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/acaf806c15f0a802ba286c23af02a10cf4bd4731) ([#8868](https://github.com/yt-dlp/yt-dlp/issues/8868)) by [DmitryScaletta](https://github.com/DmitryScaletta), [seproDev](https://github.com/seproDev) - **nytimes** - [Extract timestamp](https://github.com/yt-dlp/yt-dlp/commit/05420227aaab60a39c0f9ade069c5862be36b1fa) ([#9142](https://github.com/yt-dlp/yt-dlp/issues/9142)) by [SirElderling](https://github.com/SirElderling) - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/07256b9fee23960799024b95d5972abc7174aa81) ([#9075](https://github.com/yt-dlp/yt-dlp/issues/9075)) by [SirElderling](https://github.com/SirElderling) - **onefootball**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/644738ddaa45428cb0babd41ead22454e5a2545e) ([#9222](https://github.com/yt-dlp/yt-dlp/issues/9222)) by [seproDev](https://github.com/seproDev) - **openrec**: [Pass referer for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f591e605dfee4085ec007d6d056c943cbcacc429) ([#9253](https://github.com/yt-dlp/yt-dlp/issues/9253)) by [fireattack](https://github.com/fireattack) - **orf**: on: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a0d50aabc5462aee302bd3f2663d3a3554875789) ([#9113](https://github.com/yt-dlp/yt-dlp/issues/9113)) by [HobbyistDev](https://github.com/HobbyistDev) - **patreon**: [Fix embedded HLS extraction](https://github.com/yt-dlp/yt-dlp/commit/f0e8bc7c60b61fe18b63116c975609d76b904771) ([#8993](https://github.com/yt-dlp/yt-dlp/issues/8993)) by [johnvictorfs](https://github.com/johnvictorfs) - **peertube**: [Update instances](https://github.com/yt-dlp/yt-dlp/commit/35d96982f1033e36215d323317981ee17e8ab0d5) ([#9070](https://github.com/yt-dlp/yt-dlp/issues/9070)) by [Chocobozzz](https://github.com/Chocobozzz) - **piapro**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/8e6e3651727b0b85764857fc6329fe5e0a3f00de) ([#8999](https://github.com/yt-dlp/yt-dlp/issues/8999)) by [FinnRG](https://github.com/FinnRG) - **playsuisse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/cae6e461073fb7c32fd32052a3e6721447c469bc) ([#9077](https://github.com/yt-dlp/yt-dlp/issues/9077)) by [chkuendig](https://github.com/chkuendig) - **pornhub**: [Fix login support](https://github.com/yt-dlp/yt-dlp/commit/de954c1b4d3a6db8a6525507e65303c7bb03f39f) ([#9227](https://github.com/yt-dlp/yt-dlp/issues/9227)) by [feederbox826](https://github.com/feederbox826) - **pr0gramm**: [Enable POL filter and provide tags without login](https://github.com/yt-dlp/yt-dlp/commit/5f25f348f9eb5db842b1ec6799f95bebb7ba35a7) ([#9051](https://github.com/yt-dlp/yt-dlp/issues/9051)) by [Grub4K](https://github.com/Grub4K) - **prankcastpost**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a2bac6b7adb7b0e955125838e20bb39eece630ce) ([#8933](https://github.com/yt-dlp/yt-dlp/issues/8933)) by [columndeeply](https://github.com/columndeeply) - **radiko**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/e3ce2b385ec1f03fac9d4210c57fda77134495fc) ([#9115](https://github.com/yt-dlp/yt-dlp/issues/9115)) by [YoshichikaAAA](https://github.com/YoshichikaAAA) - **rai** - [Filter unavailable formats](https://github.com/yt-dlp/yt-dlp/commit/f78814923748277e7067b796f25870686fb46205) ([#9189](https://github.com/yt-dlp/yt-dlp/issues/9189)) by [nixxo](https://github.com/nixxo) - [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/8f423cf8051fbfeedd57cca00d106012e6e86a97) ([#9291](https://github.com/yt-dlp/yt-dlp/issues/9291)) by [nixxo](https://github.com/nixxo) - **redcdnlivx, sejm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/fcaa2e735b00b15a2b0d9f55f4187c654b4b5b39) ([#8676](https://github.com/yt-dlp/yt-dlp/issues/8676)) by [selfisekai](https://github.com/selfisekai) - **redtube** - [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c91d8b1899403daff6fc15206ad32de8db17fb8f) ([#9076](https://github.com/yt-dlp/yt-dlp/issues/9076)) by [jazz1611](https://github.com/jazz1611) - [Support redtube.com.br URLs](https://github.com/yt-dlp/yt-dlp/commit/4a6ff0b47a700dee3ee5c54804c31965308479ae) ([#9103](https://github.com/yt-dlp/yt-dlp/issues/9103)) by [jazz1611](https://github.com/jazz1611) - **ridehome**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cd7086c0d54ec1d7e02a30bd5bd934bdb2c54642) ([#8875](https://github.com/yt-dlp/yt-dlp/issues/8875)) by [SirElderling](https://github.com/SirElderling) - **rinsefmartistplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1a36dbad712d359ec1c5b73d9bbbe562c03e9660) ([#8794](https://github.com/yt-dlp/yt-dlp/issues/8794)) by [SirElderling](https://github.com/SirElderling) - **roosterteeth** - [Add Brightcove fallback](https://github.com/yt-dlp/yt-dlp/commit/b2cc150ad83ba20ceb2d6e73d09854eed3c2d05c) ([#9403](https://github.com/yt-dlp/yt-dlp/issues/9403)) by [bashonly](https://github.com/bashonly) - [Extract ad-free streams](https://github.com/yt-dlp/yt-dlp/commit/dd29e6e5fdf0f3758cb0829e73749832768f1a4e) ([#9355](https://github.com/yt-dlp/yt-dlp/issues/9355)) by [jkmartindale](https://github.com/jkmartindale) - [Extract release date and timestamp](https://github.com/yt-dlp/yt-dlp/commit/dfd8c0b69683b1c11beea039a96dd2949026c1d7) ([#9393](https://github.com/yt-dlp/yt-dlp/issues/9393)) by [bashonly](https://github.com/bashonly) - [Support bonus features](https://github.com/yt-dlp/yt-dlp/commit/8993721ecb34867b52b79f6e92b233008d1cbe78) ([#9406](https://github.com/yt-dlp/yt-dlp/issues/9406)) by [Bl4Cc4t](https://github.com/Bl4Cc4t) - **rule34video** - [Extract `creators`](https://github.com/yt-dlp/yt-dlp/commit/3d9dc2f3590e10abf1561ebdaed96734a740587c) ([#9258](https://github.com/yt-dlp/yt-dlp/issues/9258)) by [gmes78](https://github.com/gmes78) - [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/fee2d8d9c38f9b5f0a8df347c1e698983339c34d) ([#7416](https://github.com/yt-dlp/yt-dlp/issues/7416)) by [gmes78](https://github.com/gmes78) - [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c0ecceeefe6ebd27452d9d8f20658f83ae121d04) ([#9044](https://github.com/yt-dlp/yt-dlp/issues/9044)) by [gmes78](https://github.com/gmes78) - **rumblechannel**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0023af81fbce01984f35b34ecaf8562739831227) ([#9092](https://github.com/yt-dlp/yt-dlp/issues/9092)) by [Pranaxcau](https://github.com/Pranaxcau), [vista-narvas](https://github.com/vista-narvas) - **screencastify**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0bee29493ca8f91a0055a3706c7c94f5860188df) ([#9232](https://github.com/yt-dlp/yt-dlp/issues/9232)) by [seproDev](https://github.com/seproDev) - **svtpage**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/ddd4b5e10a653bee78e656107710021c1b82934c) ([#8938](https://github.com/yt-dlp/yt-dlp/issues/8938)) by [diman8](https://github.com/diman8) - **swearnet**: [Raise for login required](https://github.com/yt-dlp/yt-dlp/commit/b05640d532c43a52c0a0da096bb2dbd51e105ec0) ([#9281](https://github.com/yt-dlp/yt-dlp/issues/9281)) by [bashonly](https://github.com/bashonly) - **tiktok**: [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d9b4154cbcb979d7e30af3a73b1bee422aae5aa3) ([#9327](https://github.com/yt-dlp/yt-dlp/issues/9327)) by [bashonly](https://github.com/bashonly) - **trtworld**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/8ab84650837e58046430c9f4b615c56a8886e071) ([#8701](https://github.com/yt-dlp/yt-dlp/issues/8701)) by [ufukk](https://github.com/ufukk) - **tvp**: [Support livestreams](https://github.com/yt-dlp/yt-dlp/commit/882e3b753c79c7799ce135c3a5edb72494b576af) ([#8860](https://github.com/yt-dlp/yt-dlp/issues/8860)) by [selfisekai](https://github.com/selfisekai) - **twitch**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/5b8c69ae04444a4c80a5a99917e40f75a116c3b8) ([#8960](https://github.com/yt-dlp/yt-dlp/issues/8960)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **twitter** - [Extract bitrate for HLS audio formats](https://github.com/yt-dlp/yt-dlp/commit/28e53d60df9b8aadd52a93504e30e885c9c35262) ([#9257](https://github.com/yt-dlp/yt-dlp/issues/9257)) by [bashonly](https://github.com/bashonly) - [Extract numeric `channel_id`](https://github.com/yt-dlp/yt-dlp/commit/55f1833376505ed1e4be0516b09bb3ea4425e8a4) ([#9263](https://github.com/yt-dlp/yt-dlp/issues/9263)) by [bashonly](https://github.com/bashonly) - **txxx**: [Extract thumbnails](https://github.com/yt-dlp/yt-dlp/commit/d79c7e9937c388c68b722ab7450960e43ef776d6) ([#9063](https://github.com/yt-dlp/yt-dlp/issues/9063)) by [shmohawk](https://github.com/shmohawk) - **utreon**: [Support playeur.com](https://github.com/yt-dlp/yt-dlp/commit/41d6b61e9852a5b97f47cc8a7718b31fb23f0aea) ([#9182](https://github.com/yt-dlp/yt-dlp/issues/9182)) by [DmitryScaletta](https://github.com/DmitryScaletta) - **vbox7**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/67bb70cd700c8d4c3149cd9e0539a5f32c3d1ce6) ([#9100](https://github.com/yt-dlp/yt-dlp/issues/9100)) by [seproDev](https://github.com/seproDev) - **viewlift**: [Add support for chorki.com](https://github.com/yt-dlp/yt-dlp/commit/41b6cdb4197aaf7ad82bdad6885eb5d5c64acd74) ([#9095](https://github.com/yt-dlp/yt-dlp/issues/9095)) by [NurTasin](https://github.com/NurTasin) - **vimeo** - [Extract `live_status` and `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/f0426e9ca57dd14b82e6c13afc17947614f1e8eb) ([#9290](https://github.com/yt-dlp/yt-dlp/issues/9290)) by [pzhlkj6612](https://github.com/pzhlkj6612) - [Fix API headers](https://github.com/yt-dlp/yt-dlp/commit/8e765755f7f4909e1b535e61b7376b2d66e1ba6a) ([#9125](https://github.com/yt-dlp/yt-dlp/issues/9125)) by [bashonly](https://github.com/bashonly) - [Fix login](https://github.com/yt-dlp/yt-dlp/commit/2e8de097ad82da378e97005e8f1ff7e5aebca585) ([#9274](https://github.com/yt-dlp/yt-dlp/issues/9274)) by [bashonly](https://github.com/bashonly) - **viously**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/95e82347b398d8bb160767cdd975edecd62cbabd) ([#8927](https://github.com/yt-dlp/yt-dlp/issues/8927)) by [nbr23](https://github.com/nbr23), [seproDev](https://github.com/seproDev) - **youtube** - [Better error when all player responses are skipped](https://github.com/yt-dlp/yt-dlp/commit/5eedc208ec89d6284777060c94aadd06502338b9) ([#9083](https://github.com/yt-dlp/yt-dlp/issues/9083)) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - [Bump Android and iOS client versions](https://github.com/yt-dlp/yt-dlp/commit/413d3675804599bc8fe419c19e36490fd8f0b30f) ([#9317](https://github.com/yt-dlp/yt-dlp/issues/9317)) by [bashonly](https://github.com/bashonly) - [Further bump client versions](https://github.com/yt-dlp/yt-dlp/commit/7aad06541e543fa3452d3d2513e6f079aad1f99b) ([#9395](https://github.com/yt-dlp/yt-dlp/issues/9395)) by [bashonly](https://github.com/bashonly) - tab: [Fix `tags` extraction](https://github.com/yt-dlp/yt-dlp/commit/8828f4576bd862438d4fbf634f1d6ab18a217b0e) ([#9413](https://github.com/yt-dlp/yt-dlp/issues/9413)) by [x11x](https://github.com/x11x) - **zenporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f00c0def7434fac3c88503c2a77c4b2419b8e5ca) ([#8509](https://github.com/yt-dlp/yt-dlp/issues/8509)) by [SirElderling](https://github.com/SirElderling) - **zetland**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f4b57594673035a59d72f7667588da848820034) ([#9116](https://github.com/yt-dlp/yt-dlp/issues/9116)) by [HobbyistDev](https://github.com/HobbyistDev) #### Downloader changes - **http**: [Reset resume length to handle `FileNotFoundError`](https://github.com/yt-dlp/yt-dlp/commit/2d91b9845621639c53dca7ee9d3d954f3624ba18) ([#8399](https://github.com/yt-dlp/yt-dlp/issues/8399)) by [boredzo](https://github.com/boredzo) #### Networking changes - [Remove `_CompatHTTPError`](https://github.com/yt-dlp/yt-dlp/commit/811d298b231cfa29e75c321b23a91d1c2b17602c) ([#8871](https://github.com/yt-dlp/yt-dlp/issues/8871)) by [coletdjnz](https://github.com/coletdjnz) - **Request Handler** - [Remove additional logging handlers on close](https://github.com/yt-dlp/yt-dlp/commit/0085e2bab8465ee7d46d16fcade3ed5e96cc8a48) ([#9032](https://github.com/yt-dlp/yt-dlp/issues/9032)) by [coletdjnz](https://github.com/coletdjnz) - requests: [Apply `remove_dot_segments` to absolute redirect locations](https://github.com/yt-dlp/yt-dlp/commit/35f4f764a786685ea45d84abe1cf1ad3847f4c97) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - **build** - [Add `default` optional dependency group](https://github.com/yt-dlp/yt-dlp/commit/cf91400a1dd6cc99b11a6d163e1af73b64d618c9) ([#9295](https://github.com/yt-dlp/yt-dlp/issues/9295)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Add transitional `setup.py` and `pyinst.py`](https://github.com/yt-dlp/yt-dlp/commit/0abf2f1f153ab47990edbeee3477dc55f74c7f89) ([#9296](https://github.com/yt-dlp/yt-dlp/issues/9296)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - [Bump `actions/upload-artifact` to v4 and adjust workflows](https://github.com/yt-dlp/yt-dlp/commit/3876429d72afb35247f4b2531eb9b16cfc7e0968) by [bashonly](https://github.com/bashonly) - [Bump `conda-incubator/setup-miniconda` to v3](https://github.com/yt-dlp/yt-dlp/commit/b0059f0413a6ba6ab0a3aec1f00188ce083cd8bf) by [bashonly](https://github.com/bashonly) - [Fix `secretstorage` for ARM builds](https://github.com/yt-dlp/yt-dlp/commit/920397634d1e84e76d2cb897bd6d69ba0c6bd5ca) by [bashonly](https://github.com/bashonly) - [Migrate to `pyproject.toml` and `hatchling`](https://github.com/yt-dlp/yt-dlp/commit/775cde82dc5b1dc64ab0539a92dd8c7ba6c0ad33) by [bashonly](https://github.com/bashonly) (With fixes in [43cfd46](https://github.com/yt-dlp/yt-dlp/commit/43cfd462c0d01eff22c1d4290aeb96eb1ea2c0e1)) - [Move bundle scripts into `bundle` submodule](https://github.com/yt-dlp/yt-dlp/commit/a1b778428991b1779203bac243ef4e9b6baea90c) by [bashonly](https://github.com/bashonly) - [Support failed build job re-runs](https://github.com/yt-dlp/yt-dlp/commit/eabbccc439720fba381919a88be4fe4d96464cbd) ([#9277](https://github.com/yt-dlp/yt-dlp/issues/9277)) by [bashonly](https://github.com/bashonly) - Makefile - [Add automated `CODE_FOLDERS` and `CODE_FILES`](https://github.com/yt-dlp/yt-dlp/commit/868d2f60a7cb59b410c8cbfb452cbdb072687b81) by [bashonly](https://github.com/bashonly) - [Ensure compatibility with BSD `make`](https://github.com/yt-dlp/yt-dlp/commit/beaa1a44554d04d9fe63a743a5bb4431ca778f28) ([#9210](https://github.com/yt-dlp/yt-dlp/issues/9210)) by [bashonly](https://github.com/bashonly) (With fixes in [73fcfa3](https://github.com/yt-dlp/yt-dlp/commit/73fcfa39f59113a8728249de2c4cee3025f17dc2)) - [Fix man pages generated by `pandoc>=3`](https://github.com/yt-dlp/yt-dlp/commit/fb44020fa98e47620b3aa1dab94b4c5b7bfb40bd) ([#7047](https://github.com/yt-dlp/yt-dlp/issues/7047)) by [t-nil](https://github.com/t-nil) - **ci**: [Bump `actions/setup-python` to v5](https://github.com/yt-dlp/yt-dlp/commit/b14e818b37f62e3224da157b3ad768b3f0815fcd) by [bashonly](https://github.com/bashonly) - **cleanup** - [Build files cleanup](https://github.com/yt-dlp/yt-dlp/commit/867f637b95b342e1cb9f1dc3c6cf0ffe727187ce) by [bashonly](https://github.com/bashonly) - [Fix infodict returned fields](https://github.com/yt-dlp/yt-dlp/commit/f4f9f6d00edcac6d4eb2b3fb78bf81326235d492) ([#8906](https://github.com/yt-dlp/yt-dlp/issues/8906)) by [seproDev](https://github.com/seproDev) - [Fix typo in README.md](https://github.com/yt-dlp/yt-dlp/commit/292d60b1ed3b9fe5bcb2775a894cca99b0f9473e) ([#8894](https://github.com/yt-dlp/yt-dlp/issues/8894)) by [antonkesy](https://github.com/antonkesy) - [Mark broken and remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/df773c3d5d1cc1f877cf8582f0072e386fc49318) ([#9238](https://github.com/yt-dlp/yt-dlp/issues/9238)) by [seproDev](https://github.com/seproDev) - [Match both `http` and `https` in `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a687226b48f71b874fa18b0165ec528d591f53fb) ([#8968](https://github.com/yt-dlp/yt-dlp/issues/8968)) by [seproDev](https://github.com/seproDev) - [Remove unused code](https://github.com/yt-dlp/yt-dlp/commit/ed3bb2b0a12c44334e0d09481752dabf2ca1dc13) ([#8968](https://github.com/yt-dlp/yt-dlp/issues/8968)) by [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - Miscellaneous - [93240fc](https://github.com/yt-dlp/yt-dlp/commit/93240fc1848de4a94f25844c96e0dcd282ef1d3b) by [bashonly](https://github.com/bashonly), [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - [615a844](https://github.com/yt-dlp/yt-dlp/commit/615a84447e8322720be77a0e64298d7f42848693) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - **devscripts** - `install_deps`: [Add script and migrate to it](https://github.com/yt-dlp/yt-dlp/commit/b8a433aaca86b15cb9f1a451b0f69371d2fc22a9) by [bashonly](https://github.com/bashonly) - `tomlparse`: [Add makeshift toml parser](https://github.com/yt-dlp/yt-dlp/commit/fd647775e27e030ab17387c249e2ebeba68f8ff0) by [Grub4K](https://github.com/Grub4K) - **docs**: [Misc Cleanup](https://github.com/yt-dlp/yt-dlp/commit/47ab66db0f083a76c7fba0f6e136b21dd5a93e3b) ([#8977](https://github.com/yt-dlp/yt-dlp/issues/8977)) by [Arthurszzz](https://github.com/Arthurszzz), [bashonly](https://github.com/bashonly), [Grub4k](https://github.com/Grub4k), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - **test** - [Skip source address tests if the address cannot be bound to](https://github.com/yt-dlp/yt-dlp/commit/69d31914952dd33082ac7019c6f76b43c45b9d06) ([#8900](https://github.com/yt-dlp/yt-dlp/issues/8900)) by [coletdjnz](https://github.com/coletdjnz) - websockets: [Fix timeout test on Windows](https://github.com/yt-dlp/yt-dlp/commit/ac340d0745a9de5d494033e3507ef624ba25add3) ([#9344](https://github.com/yt-dlp/yt-dlp/issues/9344)) by [seproDev](https://github.com/seproDev) ### 2023.12.30 #### Core changes - [Fix format selection parse error for CPython 3.12](https://github.com/yt-dlp/yt-dlp/commit/00cdda4f6fe18712ced13dbc64b7ea10f323e268) ([#8797](https://github.com/yt-dlp/yt-dlp/issues/8797)) by [Grub4K](https://github.com/Grub4K) - [Let `read_stdin` obey `--quiet`](https://github.com/yt-dlp/yt-dlp/commit/a174c453ee1e853c584ceadeac17eef2bd433dc5) by [pukkandan](https://github.com/pukkandan) - [Merged with youtube-dl be008e6](https://github.com/yt-dlp/yt-dlp/commit/65de7d204ce88c0225df1321060304baab85dbd8) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [Grub4K](https://github.com/Grub4K) - [Parse `release_year` from `release_date`](https://github.com/yt-dlp/yt-dlp/commit/1732eccc0a40256e076bf0435a29f0f1d8419280) ([#8524](https://github.com/yt-dlp/yt-dlp/issues/8524)) by [seproDev](https://github.com/seproDev) - [Release workflow and Updater cleanup](https://github.com/yt-dlp/yt-dlp/commit/632b8ee54eb2df8ac6e20746a0bd95b7ebb053aa) ([#8640](https://github.com/yt-dlp/yt-dlp/issues/8640)) by [bashonly](https://github.com/bashonly) - [Remove Python 3.7 support](https://github.com/yt-dlp/yt-dlp/commit/f4b95acafcd69a50040730dfdf732e797278fdcc) ([#8361](https://github.com/yt-dlp/yt-dlp/issues/8361)) by [bashonly](https://github.com/bashonly) - [Support `NO_COLOR` environment variable](https://github.com/yt-dlp/yt-dlp/commit/a0b19d319a6ce8b7059318fa17a34b144fde1785) ([#8385](https://github.com/yt-dlp/yt-dlp/issues/8385)) by [Grub4K](https://github.com/Grub4K), [prettykool](https://github.com/prettykool) - **outtmpl**: [Support multiplication](https://github.com/yt-dlp/yt-dlp/commit/993edd3f6e17e966c763bc86dc34125445cec6b6) by [pukkandan](https://github.com/pukkandan) - **utils**: `traverse_obj`: [Move `is_user_input` into output template](https://github.com/yt-dlp/yt-dlp/commit/0b6f829b1dfda15d3c1d7d1fbe4ea6102c26dd24) ([#8673](https://github.com/yt-dlp/yt-dlp/issues/8673)) by [Grub4K](https://github.com/Grub4K) - **webvtt**: [Allow spaces before newlines for CueBlock](https://github.com/yt-dlp/yt-dlp/commit/15f22b4880b6b3f71f350c64d70976ae65b9f1ca) ([#7681](https://github.com/yt-dlp/yt-dlp/issues/7681)) by [TSRBerry](https://github.com/TSRBerry) (With fixes in [298230e](https://github.com/yt-dlp/yt-dlp/commit/298230e550886b746c266724dd701d842ca2696e) by [pukkandan](https://github.com/pukkandan)) #### Extractor changes - [Add `media_type` field](https://github.com/yt-dlp/yt-dlp/commit/e370f9ec36972d06100a3db893b397bfc1b07b4d) by [trainman261](https://github.com/trainman261) - [Extract from `media` elements in SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/ddb2d7588bea48bae965dbfabe6df6550c9d3d43) ([#8504](https://github.com/yt-dlp/yt-dlp/issues/8504)) by [seproDev](https://github.com/seproDev) - **abematv**: [Fix season metadata](https://github.com/yt-dlp/yt-dlp/commit/cc07f5cc85d9e2a6cd0bedb9d961665eea0d6047) ([#8607](https://github.com/yt-dlp/yt-dlp/issues/8607)) by [middlingphys](https://github.com/middlingphys) - **allstar**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/3237f8ba29fe13bf95ff42b1e48b5b5109715feb) ([#8274](https://github.com/yt-dlp/yt-dlp/issues/8274)) by [S-Aarab](https://github.com/S-Aarab) - **altcensored**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3f90813f0617e0d21302398010de7496c9ae36aa) ([#8291](https://github.com/yt-dlp/yt-dlp/issues/8291)) by [drzraf](https://github.com/drzraf) - **ard**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/5f009a094f0e8450792b097c4c8273622778052d) ([#8878](https://github.com/yt-dlp/yt-dlp/issues/8878)) by [seproDev](https://github.com/seproDev) - **ardbetamediathek**: [Fix series extraction](https://github.com/yt-dlp/yt-dlp/commit/1f8bd8eba82ba10ddb49ee7cc0be4540dab103d5) ([#8687](https://github.com/yt-dlp/yt-dlp/issues/8687)) by [lstrojny](https://github.com/lstrojny) - **bbc** - [Extract more formats](https://github.com/yt-dlp/yt-dlp/commit/c919b68f7e79ea5010f75f648d3c9e45405a8011) ([#8321](https://github.com/yt-dlp/yt-dlp/issues/8321)) by [barsnick](https://github.com/barsnick), [dirkf](https://github.com/dirkf) - [Fix JSON parsing bug](https://github.com/yt-dlp/yt-dlp/commit/19741ab8a401ec64d5e84fdbfcfb141d105e7bc8) by [bashonly](https://github.com/bashonly) - **bfmtv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/4903f452b68efb62dadf22e81be8c7934fc743e7) ([#8651](https://github.com/yt-dlp/yt-dlp/issues/8651)) by [bashonly](https://github.com/bashonly) - **bilibili**: [Support courses and interactive videos](https://github.com/yt-dlp/yt-dlp/commit/9f09bdcfcb8e2b4b2decdc30d35d34b993bc7a94) ([#8343](https://github.com/yt-dlp/yt-dlp/issues/8343)) by [c-basalt](https://github.com/c-basalt) - **bitchute**: [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/b1a1ec1540605d2ea7abdb63336ffb1c56bf6316) ([#8507](https://github.com/yt-dlp/yt-dlp/issues/8507)) by [SirElderling](https://github.com/SirElderling) - **box**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/5a230233d6fce06f4abd1fce0dc92b948e6f780b) ([#8649](https://github.com/yt-dlp/yt-dlp/issues/8649)) by [bashonly](https://github.com/bashonly) - **bundestag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/00a3e47bf5440c96025a76e08337ff2a475ed83e) ([#8783](https://github.com/yt-dlp/yt-dlp/issues/8783)) by [Grub4K](https://github.com/Grub4K) - **drtv**: [Set default ext for m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/f96ab86cd837b1b5823baa87d144e15322ee9298) ([#8590](https://github.com/yt-dlp/yt-dlp/issues/8590)) by [seproDev](https://github.com/seproDev) - **duoplay**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/66a0127d45033c698bdbedf162cddc55d9e7b906) ([#8542](https://github.com/yt-dlp/yt-dlp/issues/8542)) by [glensc](https://github.com/glensc) - **eplus**: [Add login support and DRM detection](https://github.com/yt-dlp/yt-dlp/commit/d5d1517e7d838500800d193ac3234b06e89654cd) ([#8661](https://github.com/yt-dlp/yt-dlp/issues/8661)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **facebook** - [Fix Memories extraction](https://github.com/yt-dlp/yt-dlp/commit/c39358a54bc6675ae0c50b81024e5a086e41656a) ([#8681](https://github.com/yt-dlp/yt-dlp/issues/8681)) by [kclauhk](https://github.com/kclauhk) - [Improve subtitles extraction](https://github.com/yt-dlp/yt-dlp/commit/9cafb9ff17e14475a35c9a58b5bb010c86c9db4b) ([#8296](https://github.com/yt-dlp/yt-dlp/issues/8296)) by [kclauhk](https://github.com/kclauhk) - **floatplane**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/628fa244bbce2ad39775a5959e99588f30cac152) ([#8639](https://github.com/yt-dlp/yt-dlp/issues/8639)) by [seproDev](https://github.com/seproDev) - **francetv**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/71f28097fec1c9e029f74b68a4eadc8915399840) ([#8409](https://github.com/yt-dlp/yt-dlp/issues/8409)) by [Fymyte](https://github.com/Fymyte) - **instagram**: [Fix stories extraction](https://github.com/yt-dlp/yt-dlp/commit/50eaea9fd7787546b53660e736325fa31c77765d) ([#8843](https://github.com/yt-dlp/yt-dlp/issues/8843)) by [bashonly](https://github.com/bashonly) - **joqrag**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/db8b4edc7d0bd27da462f6fe82ff6e13e3d68a04) ([#8384](https://github.com/yt-dlp/yt-dlp/issues/8384)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **litv**: [Fix premium content extraction](https://github.com/yt-dlp/yt-dlp/commit/f45c4efcd928a173e1300a8f1ce4258e70c969b1) ([#8842](https://github.com/yt-dlp/yt-dlp/issues/8842)) by [bashonly](https://github.com/bashonly) - **maariv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c5f01bf7d4b9426c87c3f8248de23934a56579e0) ([#8331](https://github.com/yt-dlp/yt-dlp/issues/8331)) by [amir16yp](https://github.com/amir16yp) - **mediastream**: [Fix authenticated format extraction](https://github.com/yt-dlp/yt-dlp/commit/b03c89309eb141be1a1eceeeb7475dd3b7529ad9) ([#8657](https://github.com/yt-dlp/yt-dlp/issues/8657)) by [NickCis](https://github.com/NickCis) - **nebula**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/45d82be65f71bb05506bd55376c6fdb36bc54142) ([#8566](https://github.com/yt-dlp/yt-dlp/issues/8566)) by [elyse0](https://github.com/elyse0), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - **nintendo**: [Fix Nintendo Direct extraction](https://github.com/yt-dlp/yt-dlp/commit/1d24da6c899ef280d8b0a48a5e280ecd5d39cdf4) ([#8609](https://github.com/yt-dlp/yt-dlp/issues/8609)) by [Grub4K](https://github.com/Grub4K) - **ondemandkorea**: [Fix upgraded format extraction](https://github.com/yt-dlp/yt-dlp/commit/04a5e06350e3ef7c03f94f2f3f90dd96c6411152) ([#8677](https://github.com/yt-dlp/yt-dlp/issues/8677)) by [seproDev](https://github.com/seproDev) - **pr0gramm**: [Support variant formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/f98a3305eb124a0c375d03209d5c5a64fe1766c8) ([#8674](https://github.com/yt-dlp/yt-dlp/issues/8674)) by [Grub4K](https://github.com/Grub4K) - **rinsefm**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c91af948e43570025e4aa887e248fd025abae394) ([#8778](https://github.com/yt-dlp/yt-dlp/issues/8778)) by [hashFactory](https://github.com/hashFactory) - **rudovideo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/0d531c35eca4c2eb36e160530a7a333edbc727cc) ([#8664](https://github.com/yt-dlp/yt-dlp/issues/8664)) by [nicodato](https://github.com/nicodato) - **theguardian**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/1fa3f24d4b5d22176b11d78420f1f4b64a5af0a8) ([#8535](https://github.com/yt-dlp/yt-dlp/issues/8535)) by [SirElderling](https://github.com/SirElderling) - **theplatform**: [Extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/7e09c147fdccb44806bbf601573adc4b77210a89) ([#8635](https://github.com/yt-dlp/yt-dlp/issues/8635)) by [trainman261](https://github.com/trainman261) - **twitcasting**: [Detect livestreams via API and `show` page](https://github.com/yt-dlp/yt-dlp/commit/585d0ed9abcfcb957f2b2684b8ad43c3af160383) ([#8601](https://github.com/yt-dlp/yt-dlp/issues/8601)) by [bashonly](https://github.com/bashonly), [JC-Chung](https://github.com/JC-Chung) - **twitcastinguser**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/ff2fde1b8f922fd34bae6172602008cd67c07c93) ([#8650](https://github.com/yt-dlp/yt-dlp/issues/8650)) by [bashonly](https://github.com/bashonly) - **twitter** - [Extract stale tweets](https://github.com/yt-dlp/yt-dlp/commit/1c54a98e19d047e7c15184237b6ef8ad50af489c) ([#8724](https://github.com/yt-dlp/yt-dlp/issues/8724)) by [bashonly](https://github.com/bashonly) - [Prioritize m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/e7d22348e77367740da78a3db27167ecf894b7c9) ([#8826](https://github.com/yt-dlp/yt-dlp/issues/8826)) by [bashonly](https://github.com/bashonly) - [Work around API rate-limit](https://github.com/yt-dlp/yt-dlp/commit/116c268438ea4d3738f6fa502c169081ca8f0ee7) ([#8825](https://github.com/yt-dlp/yt-dlp/issues/8825)) by [bashonly](https://github.com/bashonly) - broadcast: [Extract `concurrent_view_count`](https://github.com/yt-dlp/yt-dlp/commit/6fe82491ed622b948c512cf4aab46ac3a234ae0a) ([#8600](https://github.com/yt-dlp/yt-dlp/issues/8600)) by [sonmezberkay](https://github.com/sonmezberkay) - **vidly**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/34df1c1f60fa652c0a6a5c712b06c10e45daf6b7) ([#8612](https://github.com/yt-dlp/yt-dlp/issues/8612)) by [seproDev](https://github.com/seproDev) - **vocaroo**: [Do not use deprecated `getheader`](https://github.com/yt-dlp/yt-dlp/commit/f223b1b0789f65e06619dcc9fc9e74f50d259379) ([#8606](https://github.com/yt-dlp/yt-dlp/issues/8606)) by [qbnu](https://github.com/qbnu) - **vvvvid**: [Set user-agent to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1725e943b0e8a8b585305660d4611e684374409c) ([#8615](https://github.com/yt-dlp/yt-dlp/issues/8615)) by [Kyraminol](https://github.com/Kyraminol) - **youtube** - [Fix `like_count` extraction](https://github.com/yt-dlp/yt-dlp/commit/6b5d93b0b0240e287389d1d43b2d5293e18aa4cc) ([#8763](https://github.com/yt-dlp/yt-dlp/issues/8763)) by [Ganesh910](https://github.com/Ganesh910) - [Improve detection of faulty HLS formats](https://github.com/yt-dlp/yt-dlp/commit/bb5a54e6db2422bbd155d93a0e105b6616c09467) ([#8646](https://github.com/yt-dlp/yt-dlp/issues/8646)) by [bashonly](https://github.com/bashonly) - [Return empty playlist when channel/tab has no videos](https://github.com/yt-dlp/yt-dlp/commit/044886c220620a7679109e92352890e18b6079e3) by [pukkandan](https://github.com/pukkandan) - [Support cf.piped.video](https://github.com/yt-dlp/yt-dlp/commit/6a9c7a2b52655bacfa7ab2da24fd0d14a6fff495) ([#8514](https://github.com/yt-dlp/yt-dlp/issues/8514)) by [OIRNOIR](https://github.com/OIRNOIR) - **zingmp3**: [Add support for radio and podcasts](https://github.com/yt-dlp/yt-dlp/commit/64de1a4c25bada90374b88d7353754fe8fbfcc51) ([#7189](https://github.com/yt-dlp/yt-dlp/issues/7189)) by [hatienl0i261299](https://github.com/hatienl0i261299) #### Postprocessor changes - **ffmpegmetadata**: [Embed stream metadata in single format downloads](https://github.com/yt-dlp/yt-dlp/commit/deeb13eae82e60f82a2c0c5861f460399a997528) ([#8647](https://github.com/yt-dlp/yt-dlp/issues/8647)) by [bashonly](https://github.com/bashonly) #### Networking changes - [Strip whitespace around header values](https://github.com/yt-dlp/yt-dlp/commit/196eb0fe77b78e2e5ca02c506c3837c2b1a7964c) ([#8802](https://github.com/yt-dlp/yt-dlp/issues/8802)) by [coletdjnz](https://github.com/coletdjnz) - **Request Handler**: websockets: [Migrate websockets to networking framework](https://github.com/yt-dlp/yt-dlp/commit/ccfd70f4c24b579c72123ca76ab50164f8f122b7) ([#7720](https://github.com/yt-dlp/yt-dlp/issues/7720)) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - **ci** - [Concurrency optimizations](https://github.com/yt-dlp/yt-dlp/commit/f124fa458826308afc86cf364c509f857686ecfd) ([#8614](https://github.com/yt-dlp/yt-dlp/issues/8614)) by [Grub4K](https://github.com/Grub4K) - [Run core tests only for core changes](https://github.com/yt-dlp/yt-dlp/commit/13b3cb3c2b7169a1e17d6fc62593bf744170521c) ([#8841](https://github.com/yt-dlp/yt-dlp/issues/8841)) by [Grub4K](https://github.com/Grub4K) - **cleanup** - [Fix spelling of `IE_NAME`](https://github.com/yt-dlp/yt-dlp/commit/bc4ab17b38f01000d99c5c2bedec89721fee65ec) ([#8810](https://github.com/yt-dlp/yt-dlp/issues/8810)) by [barsnick](https://github.com/barsnick) - [Remove dead extractors](https://github.com/yt-dlp/yt-dlp/commit/9751a457cfdb18bf99d9ee0d10e4e6a594502bbf) ([#8604](https://github.com/yt-dlp/yt-dlp/issues/8604)) by [seproDev](https://github.com/seproDev) - Miscellaneous: [f9fb3ce](https://github.com/yt-dlp/yt-dlp/commit/f9fb3ce86e3c6a0c3c33b45392b8d7288bceba76) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan), [seproDev](https://github.com/seproDev) - **devscripts**: `run_tests`: [Create Python script](https://github.com/yt-dlp/yt-dlp/commit/2d1d683a541d71f3d3bb999dfe8eeb1976fb91ce) ([#8720](https://github.com/yt-dlp/yt-dlp/issues/8720)) by [Grub4K](https://github.com/Grub4K) (With fixes in [225cf2b](https://github.com/yt-dlp/yt-dlp/commit/225cf2b830a1de2c5eacd257edd2a01aed1e1114)) - **docs**: [Update youtube-dl merge commit in `README.md`](https://github.com/yt-dlp/yt-dlp/commit/f10589e3453009bb523f55849bba144c9b91cf2a) by [bashonly](https://github.com/bashonly) - **test**: networking: [Update tests for OpenSSL 3.2](https://github.com/yt-dlp/yt-dlp/commit/37755a037e612bfc608c3d4722e8ef2ce6a022ee) ([#8814](https://github.com/yt-dlp/yt-dlp/issues/8814)) by [bashonly](https://github.com/bashonly) ### 2023.11.16 #### Extractor changes - **abc.net.au**: iview, showseries: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15cb3528cbda7b6198f49a6b5953c226d701696b) ([#8586](https://github.com/yt-dlp/yt-dlp/issues/8586)) by [bashonly](https://github.com/bashonly) - **beatbump**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/21dc069bea2d4d99345dd969e098f4535c751d45) ([#8576](https://github.com/yt-dlp/yt-dlp/issues/8576)) by [seproDev](https://github.com/seproDev) - **dailymotion**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a489f071508ec5caf5f32052d142afe86c28df7a) ([#7692](https://github.com/yt-dlp/yt-dlp/issues/7692)) by [TravisDupes](https://github.com/TravisDupes) - **drtv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/0783fd558ed0d3a8bc754beb75a406256f8b97b2) ([#8484](https://github.com/yt-dlp/yt-dlp/issues/8484)) by [almx](https://github.com/almx), [seproDev](https://github.com/seproDev) - **eltrecetv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dcfad52812aa8ce007cefbfbe63f58b49f6b1046) ([#8216](https://github.com/yt-dlp/yt-dlp/issues/8216)) by [elivinsky](https://github.com/elivinsky) - **jiosaavn**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b530118e7f48232cacf8050d79a6b20bdfcf5468) ([#8307](https://github.com/yt-dlp/yt-dlp/issues/8307)) by [awalgarg](https://github.com/awalgarg) - **njpwworld**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/e569c2d1f4b665795a2b64f0aaf7f76930664233) ([#8570](https://github.com/yt-dlp/yt-dlp/issues/8570)) by [aarubui](https://github.com/aarubui) - **tv5mondeplus**: [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/0f634dba3afdc429ece8839b02f6d56c27b7973a) ([#4209](https://github.com/yt-dlp/yt-dlp/issues/4209)) by [FrankZ85](https://github.com/FrankZ85) - **twitcasting**: [Fix livestream detection](https://github.com/yt-dlp/yt-dlp/commit/2325d03aa7bb80f56ba52cd6992258e44727b424) ([#8574](https://github.com/yt-dlp/yt-dlp/issues/8574)) by [JC-Chung](https://github.com/JC-Chung) - **zenyandex**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5efe68b73cbf6e907c2e6a3aa338664385084184) ([#8454](https://github.com/yt-dlp/yt-dlp/issues/8454)) by [starius](https://github.com/starius) #### Misc. changes - **build**: [Make `secretstorage` an optional dependency](https://github.com/yt-dlp/yt-dlp/commit/24f827875c6ba513f12ed09a3aef2bbed223760d) ([#8585](https://github.com/yt-dlp/yt-dlp/issues/8585)) by [bashonly](https://github.com/bashonly) ### 2023.11.14 #### Important changes - **The release channels have been adjusted!** * [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel. * [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes. - Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x) - Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers #### Core changes - [Add `--compat-option manifest-filesize-approx`](https://github.com/yt-dlp/yt-dlp/commit/10025b715ea01489557eb2c5a3cc04d361fcdb52) ([#8356](https://github.com/yt-dlp/yt-dlp/issues/8356)) by [bashonly](https://github.com/bashonly) - [Fix format sorting with `--load-info-json`](https://github.com/yt-dlp/yt-dlp/commit/595ea4a99b726b8fe9463e7853b7053978d0544e) ([#8521](https://github.com/yt-dlp/yt-dlp/issues/8521)) by [bashonly](https://github.com/bashonly) - [Include build origin in verbose output](https://github.com/yt-dlp/yt-dlp/commit/20314dd46f25e0e0a7e985a7804049aefa8b909f) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Only ensure playlist thumbnail dir if writing thumbs](https://github.com/yt-dlp/yt-dlp/commit/a40e0b37dfc8c26916b0e01aa3f29f3bc42250b6) ([#8373](https://github.com/yt-dlp/yt-dlp/issues/8373)) by [bashonly](https://github.com/bashonly) - **update**: [Overhaul self-updater](https://github.com/yt-dlp/yt-dlp/commit/0b6ad22e6a432006a75df968f0283e6c6b3cfae6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) #### Extractor changes - [Do not smuggle `http_headers`](https://github.com/yt-dlp/yt-dlp/commit/f04b5bedad7b281bee9814686bba1762bae092eb) by [coletdjnz](https://github.com/coletdjnz) - [Do not test truth value of `xml.etree.ElementTree.Element`](https://github.com/yt-dlp/yt-dlp/commit/d4f14a72dc1dd79396e0e80980268aee902b61e4) ([#8582](https://github.com/yt-dlp/yt-dlp/issues/8582)) by [bashonly](https://github.com/bashonly) - **brilliantpala**: [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/9b5bedf13a3323074daceb0ec6ebb3cc6e0b9684) ([#8352](https://github.com/yt-dlp/yt-dlp/issues/8352)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **generic**: [Improve direct video link ext detection](https://github.com/yt-dlp/yt-dlp/commit/4ce2f29a50fcfb9920e6f2ffe42192945a2bad7e) ([#8340](https://github.com/yt-dlp/yt-dlp/issues/8340)) by [bashonly](https://github.com/bashonly) - **laxarxames**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/312a2d1e8bc247264f9d85c5ec764e33aa0133b5) ([#8412](https://github.com/yt-dlp/yt-dlp/issues/8412)) by [aniolpages](https://github.com/aniolpages) - **n-tv.de**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8afd9468b0c822843bc480d366d1c86698daabfb) ([#8414](https://github.com/yt-dlp/yt-dlp/issues/8414)) by [1100101](https://github.com/1100101) - **neteasemusic**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/46acc418a53470b7f32581b3309c3cb87aa8488d) ([#8531](https://github.com/yt-dlp/yt-dlp/issues/8531)) by [LoserFox](https://github.com/LoserFox) - **nhk**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/54579be4364e148277c32e20a5c3efc2c3f52f5b) ([#8388](https://github.com/yt-dlp/yt-dlp/issues/8388)) by [garret1317](https://github.com/garret1317) - **novaembed**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3ff494f6f41c27549420fa88be27555bd449ffdc) ([#8368](https://github.com/yt-dlp/yt-dlp/issues/8368)) by [peci1](https://github.com/peci1) - **npo**: [Send `POST` request to streams API endpoint](https://github.com/yt-dlp/yt-dlp/commit/8e02a4dcc800f9444e9d461edc41edd7b662f435) ([#8413](https://github.com/yt-dlp/yt-dlp/issues/8413)) by [bartbroere](https://github.com/bartbroere) - **ondemandkorea**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/05adfd883a4f2ecae0267e670a62a2e45c351aeb) ([#8386](https://github.com/yt-dlp/yt-dlp/issues/8386)) by [seproDev](https://github.com/seproDev) - **orf**: podcast: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6ba3085616652cbf05d1858efc321fdbfc4c6119) ([#8486](https://github.com/yt-dlp/yt-dlp/issues/8486)) by [Esokrates](https://github.com/Esokrates) - **polskieradio**: audition: [Fix playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/464327acdb353ceb91d2115163a5a9621b22fe0d) ([#8459](https://github.com/yt-dlp/yt-dlp/issues/8459)) by [shubhexists](https://github.com/shubhexists) - **qdance**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/177f0d963e4b9db749805c482e6f288354c8be84) ([#8426](https://github.com/yt-dlp/yt-dlp/issues/8426)) by [bashonly](https://github.com/bashonly) - **radiocomercial**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/ef12dbdcd3e7264bd3d744c1e3107597bd23ad35) ([#8508](https://github.com/yt-dlp/yt-dlp/issues/8508)) by [SirElderling](https://github.com/SirElderling) - **sbs.co.kr**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/25a4bd345a0dcfece6fef752d4537eb403da94d9) ([#8326](https://github.com/yt-dlp/yt-dlp/issues/8326)) by [seproDev](https://github.com/seproDev) - **theatercomplextown**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/2863fcf2b6876d0c7965ff7d6d9242eea653dc6b) ([#8560](https://github.com/yt-dlp/yt-dlp/issues/8560)) by [bashonly](https://github.com/bashonly) - **thisav**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/cb480e390d85fb3a598c1b6d5eef3438ce729fc9) ([#8346](https://github.com/yt-dlp/yt-dlp/issues/8346)) by [bashonly](https://github.com/bashonly) - **thisoldhouse**: [Add login support](https://github.com/yt-dlp/yt-dlp/commit/c76c96677ff6a056f5844a568ef05ee22c46d6f4) ([#8561](https://github.com/yt-dlp/yt-dlp/issues/8561)) by [bashonly](https://github.com/bashonly) - **twitcasting**: [Fix livestream extraction](https://github.com/yt-dlp/yt-dlp/commit/7b8b1cf5eb8bf44ce70bc24e1f56f0dba2737e98) ([#8427](https://github.com/yt-dlp/yt-dlp/issues/8427)) by [JC-Chung](https://github.com/JC-Chung), [saintliao](https://github.com/saintliao) - **twitter** - broadcast - [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/7d337ca977d73a0a6c07ab481ed8faa8f6ff8726) ([#8383](https://github.com/yt-dlp/yt-dlp/issues/8383)) by [HitomaruKonpaku](https://github.com/HitomaruKonpaku) - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/f6e97090d2ed9e05441ab0f4bec3559b816d7a00) ([#8475](https://github.com/yt-dlp/yt-dlp/issues/8475)) by [bashonly](https://github.com/bashonly) - **weibo**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/15b252dfd2c6807fe57afc5a95e59abadb32ccd2) ([#8463](https://github.com/yt-dlp/yt-dlp/issues/8463)) by [c-basalt](https://github.com/c-basalt) - **weverse**: [Fix login error handling](https://github.com/yt-dlp/yt-dlp/commit/4a601c9eff9fb42e24a4c8da3fa03628e035b35b) ([#8458](https://github.com/yt-dlp/yt-dlp/issues/8458)) by [seproDev](https://github.com/seproDev) - **youtube**: [Check newly uploaded iOS HLS formats](https://github.com/yt-dlp/yt-dlp/commit/ef79d20dc9d27ac002a7196f073b37f2f2721aed) ([#8336](https://github.com/yt-dlp/yt-dlp/issues/8336)) by [bashonly](https://github.com/bashonly) - **zoom**: [Extract combined view formats](https://github.com/yt-dlp/yt-dlp/commit/3906de07551fedb00b789345bf24cc27d6ddf128) ([#7847](https://github.com/yt-dlp/yt-dlp/issues/7847)) by [Mipsters](https://github.com/Mipsters) #### Downloader changes - **aria2c**: [Remove duplicate `--file-allocation=none`](https://github.com/yt-dlp/yt-dlp/commit/21b25281c51523620706b11bfc1c4a889858e1f2) ([#8332](https://github.com/yt-dlp/yt-dlp/issues/8332)) by [CrendKing](https://github.com/CrendKing) - **dash**: [Force native downloader for `--live-from-start`](https://github.com/yt-dlp/yt-dlp/commit/2622c804d1a5accc3045db398e0fc52074f4bdb3) ([#8339](https://github.com/yt-dlp/yt-dlp/issues/8339)) by [bashonly](https://github.com/bashonly) #### Networking changes - **Request Handler**: requests: [Add handler for `requests` HTTP library (#3668)](https://github.com/yt-dlp/yt-dlp/commit/8a8b54523addf46dfd50ef599761a81bc22362e6) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [Grub4K](https://github.com/Grub4K) (With fixes in [4e38e2a](https://github.com/yt-dlp/yt-dlp/commit/4e38e2ae9d7380015349e6aee59c78bb3938befd)) Adds support for HTTPS proxies and persistent connections (keep-alive) #### Misc. changes - **build** - [Include secretstorage in Linux builds](https://github.com/yt-dlp/yt-dlp/commit/9970d74c8383432c6c8779aa47d3253dcf412b14) by [bashonly](https://github.com/bashonly) - [Overhaul and unify release workflow](https://github.com/yt-dlp/yt-dlp/commit/1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - **ci** - [Bump `actions/checkout` to v4](https://github.com/yt-dlp/yt-dlp/commit/5438593a35b7b042fc48fe29cad0b9039f07c9bb) by [bashonly](https://github.com/bashonly) - [Run core tests with dependencies](https://github.com/yt-dlp/yt-dlp/commit/700444c23ddb65f618c2abd942acdc0c58c650b1) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) - **cleanup** - [Fix changelog typo](https://github.com/yt-dlp/yt-dlp/commit/a9d3f4b20a3533d2a40104c85bc2cc6c2564c800) by [bashonly](https://github.com/bashonly) - [Update documentation for master and nightly channels](https://github.com/yt-dlp/yt-dlp/commit/a00af29853b8c7350ce086f4cab8c2c9cf2fcf1d) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - Miscellaneous: [b012271](https://github.com/yt-dlp/yt-dlp/commit/b012271d01b59759e4eefeab0308698cd9e7224c) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [seproDev](https://github.com/seproDev) - **test**: update: [Implement simple updater unit tests](https://github.com/yt-dlp/yt-dlp/commit/87264d4fdadcddd91289b968dd0e4bf58d449267) by [bashonly](https://github.com/bashonly) ### 2023.10.13 #### Core changes - [Ensure thumbnail output directory exists](https://github.com/yt-dlp/yt-dlp/commit/2acd1d555ef89851c73773776715d3de9a0e30b9) ([#7985](https://github.com/yt-dlp/yt-dlp/issues/7985)) by [Riteo](https://github.com/Riteo) - **utils** - `js_to_json`: [Fix `Date` constructor parsing](https://github.com/yt-dlp/yt-dlp/commit/9d7ded6419089c1bf252496073f73ad90ed71004) ([#8295](https://github.com/yt-dlp/yt-dlp/issues/8295)) by [awalgarg](https://github.com/awalgarg), [Grub4K](https://github.com/Grub4K) - `write_xattr`: [Use `os.setxattr` if available](https://github.com/yt-dlp/yt-dlp/commit/84e26038d4002e763ea51ca1bdce4f7e63c540bf) ([#8205](https://github.com/yt-dlp/yt-dlp/issues/8205)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) #### Extractor changes - **artetv**: [Support age-restricted content](https://github.com/yt-dlp/yt-dlp/commit/09f815ad52843219a7ee3f2a0dddf6c250c91f0c) ([#8301](https://github.com/yt-dlp/yt-dlp/issues/8301)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) - **jtbc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b286ec68f1f28798b3e371f888a2ed97d399cf77) ([#8314](https://github.com/yt-dlp/yt-dlp/issues/8314)) by [seproDev](https://github.com/seproDev) - **mbn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e030b6b6fba7b2f4614ad2ab9f7649d40a2dd305) ([#8312](https://github.com/yt-dlp/yt-dlp/issues/8312)) by [seproDev](https://github.com/seproDev) - **nhk**: [Fix Japanese-language VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/4de94b9e165bfd6421a692f5f2eabcdb08edcb71) ([#8309](https://github.com/yt-dlp/yt-dlp/issues/8309)) by [garret1317](https://github.com/garret1317) - **radiko**: [Fix bug with `downloader_options`](https://github.com/yt-dlp/yt-dlp/commit/b9316642313bbc9e209ac0d2276d37ba60bceb49) by [bashonly](https://github.com/bashonly) - **tenplay**: [Add support for seasons](https://github.com/yt-dlp/yt-dlp/commit/88a99c87b680ae59002534a517e191f46c42cbd4) ([#7939](https://github.com/yt-dlp/yt-dlp/issues/7939)) by [midnightveil](https://github.com/midnightveil) - **youku**: [Improve tudou.com support](https://github.com/yt-dlp/yt-dlp/commit/b7098d46b552a9322c6cea39ba80be5229f922de) ([#8160](https://github.com/yt-dlp/yt-dlp/issues/8160)) by [naginatana](https://github.com/naginatana) - **youtube**: [Fix bug with `--extractor-retries inf`](https://github.com/yt-dlp/yt-dlp/commit/feebf6d02fc9651331eee2af5e08e6112288163b) ([#8328](https://github.com/yt-dlp/yt-dlp/issues/8328)) by [Grub4K](https://github.com/Grub4K) #### Downloader changes - **fragment**: [Improve progress calculation](https://github.com/yt-dlp/yt-dlp/commit/1c51c520f7b511ebd9e4eb7322285a8c31eedbbd) ([#8241](https://github.com/yt-dlp/yt-dlp/issues/8241)) by [Grub4K](https://github.com/Grub4K) #### Misc. changes - **cleanup**: Miscellaneous: [b634ba7](https://github.com/yt-dlp/yt-dlp/commit/b634ba742d8f38ce9ecfa0546485728b0c6c59d1) by [bashonly](https://github.com/bashonly), [gamer191](https://github.com/gamer191) ### 2023.10.07 #### Extractor changes - **abc.net.au**: iview: [Improve `episode` extraction](https://github.com/yt-dlp/yt-dlp/commit/a9efb4b8d74f3583450ffda0ee57259a47d39c70) ([#8201](https://github.com/yt-dlp/yt-dlp/issues/8201)) by [xofe](https://github.com/xofe) - **erocast**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/47c598783c98c179e04dd12c2a3fee0f3dc53087) ([#8264](https://github.com/yt-dlp/yt-dlp/issues/8264)) by [madewokherd](https://github.com/madewokherd) - **gofile**: [Fix token cookie bug](https://github.com/yt-dlp/yt-dlp/commit/0730d5a966fa8a937d84bfb7f68be5198acb039b) by [bashonly](https://github.com/bashonly) - **iq.com**: [Fix extraction and subtitles](https://github.com/yt-dlp/yt-dlp/commit/35d9cbaf9638ccc9daf8a863063b2e7c135bc664) ([#8260](https://github.com/yt-dlp/yt-dlp/issues/8260)) by [AS6939](https://github.com/AS6939) - **lbry** - [Add playlist support](https://github.com/yt-dlp/yt-dlp/commit/48cceec1ddb8649b5e771df8df79eb9c39c82b90) ([#8213](https://github.com/yt-dlp/yt-dlp/issues/8213)) by [bashonly](https://github.com/bashonly), [drzraf](https://github.com/drzraf), [Grub4K](https://github.com/Grub4K) - [Extract `uploader_id`](https://github.com/yt-dlp/yt-dlp/commit/0e722f2f3ca42e634fd7b06ee70b16bf833ce132) ([#8244](https://github.com/yt-dlp/yt-dlp/issues/8244)) by [drzraf](https://github.com/drzraf) - **litv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/91a670a4f7babe9c8aa2018f57d8c8952a6f49d8) ([#7785](https://github.com/yt-dlp/yt-dlp/issues/7785)) by [jiru](https://github.com/jiru) - **neteasemusic**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/f980df734cf5c0eaded2f7b38c6c60bccfeebb48) ([#8181](https://github.com/yt-dlp/yt-dlp/issues/8181)) by [c-basalt](https://github.com/c-basalt) - **nhk**: [Fix VOD extraction](https://github.com/yt-dlp/yt-dlp/commit/e831c80e8b2fc025b3b67d82974cc59e3526fdc8) ([#8249](https://github.com/yt-dlp/yt-dlp/issues/8249)) by [garret1317](https://github.com/garret1317) - **radiko**: [Improve extraction](https://github.com/yt-dlp/yt-dlp/commit/2ad3873f0dfa9285c91d2160e36c039e69d597c7) ([#8221](https://github.com/yt-dlp/yt-dlp/issues/8221)) by [garret1317](https://github.com/garret1317) - **substack** - [Fix download cookies bug](https://github.com/yt-dlp/yt-dlp/commit/2f2dda3a7e85148773da3cdbc03ac9949ec1bc45) ([#8219](https://github.com/yt-dlp/yt-dlp/issues/8219)) by [handlerug](https://github.com/handlerug) - [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/fbcc299bd8a19cf8b3c8805d6c268a9110230973) ([#8218](https://github.com/yt-dlp/yt-dlp/issues/8218)) by [handlerug](https://github.com/handlerug) - **theta**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/792f1e64f6a2beac51e85408d142b3118115c4fd) ([#8251](https://github.com/yt-dlp/yt-dlp/issues/8251)) by [alerikaisattera](https://github.com/alerikaisattera) - **wrestleuniversevod**: [Call API with device ID](https://github.com/yt-dlp/yt-dlp/commit/b095fd3fa9d58a65dc9b830bd63b9d909422aa86) ([#8272](https://github.com/yt-dlp/yt-dlp/issues/8272)) by [bashonly](https://github.com/bashonly) - **xhamster**: user: [Support creator urls](https://github.com/yt-dlp/yt-dlp/commit/cc8d8441524ec3442d7c0d3f8f33f15b66aa06f3) ([#8232](https://github.com/yt-dlp/yt-dlp/issues/8232)) by [Grub4K](https://github.com/Grub4K) - **youtube** - [Fix `heatmap` extraction](https://github.com/yt-dlp/yt-dlp/commit/03e85ea99db76a2fddb65bf46f8819bda780aaf3) ([#8299](https://github.com/yt-dlp/yt-dlp/issues/8299)) by [bashonly](https://github.com/bashonly) - [Raise a warning for `Incomplete Data` instead of an error](https://github.com/yt-dlp/yt-dlp/commit/eb5bdbfa70126c7d5355cc0954b63720522e462c) ([#8238](https://github.com/yt-dlp/yt-dlp/issues/8238)) by [coletdjnz](https://github.com/coletdjnz) #### Misc. changes - **cleanup** - [Update extractor tests](https://github.com/yt-dlp/yt-dlp/commit/19c90e405b4137c06dfe6f9aaa02396df0da93e5) ([#7718](https://github.com/yt-dlp/yt-dlp/issues/7718)) by [trainman261](https://github.com/trainman261) - Miscellaneous: [377e85a](https://github.com/yt-dlp/yt-dlp/commit/377e85a1797db9e98b78b38203ed9d4ded229991) by [dirkf](https://github.com/dirkf), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K) ### 2023.09.24 #### Important changes - **The minimum *recommended* Python version has been raised to 3.8** Since Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803) - Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg) - The shell escape function is now using `""` instead of `\"`. - `utils.Popen` has been patched to properly quote commands. #### Core changes - [Fix HTTP headers and cookie handling](https://github.com/yt-dlp/yt-dlp/commit/6c5211cebeacfc53ad5d5ddf4a659be76039656f) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) - [Fix `--check-formats`](https://github.com/yt-dlp/yt-dlp/commit/8cb7fc44db010e965d808ee679ef0725cb6e147c) by [pukkandan](https://github.com/pukkandan) - [Fix support for upcoming Python 3.12](https://github.com/yt-dlp/yt-dlp/commit/836e06d246512f286f30c1371b2c54b72c9ecd93) ([#8130](https://github.com/yt-dlp/yt-dlp/issues/8130)) by [Grub4K](https://github.com/Grub4K) - [Merged with youtube-dl 66ab08](https://github.com/yt-dlp/yt-dlp/commit/9d6254069c75877bc88bc3584f4326fb1853a543) by [coletdjnz](https://github.com/coletdjnz) - [Prevent RCE when using `--exec` with `%q` (CVE-2023-40581)](https://github.com/yt-dlp/yt-dlp/commit/de015e930747165dbb8fcd360f8775fd973b7d6e) by [Grub4K](https://github.com/Grub4K) - [Raise minimum recommended Python version to 3.8](https://github.com/yt-dlp/yt-dlp/commit/61bdf15fc7400601c3da1aa7a43917310a5bf391) ([#8183](https://github.com/yt-dlp/yt-dlp/issues/8183)) by [Grub4K](https://github.com/Grub4K) - [`FFmpegFixupM3u8PP` may need to run with ffmpeg](https://github.com/yt-dlp/yt-dlp/commit/f73c11803579889dc8e1c99e25dba9a22fef39d8) by [pukkandan](https://github.com/pukkandan) - **compat** - [Add `types.NoneType`](https://github.com/yt-dlp/yt-dlp/commit/e0c4db04dc82a699bdabd9821ddc239ebe17d30a) by [pukkandan](https://github.com/pukkandan) (With fixes in [25b6e8f](https://github.com/yt-dlp/yt-dlp/commit/25b6e8f94679b4458550702b46e61249b875a4fd)) - [Deprecate old functions](https://github.com/yt-dlp/yt-dlp/commit/3d2623a898196640f7cc0fc8b70118ff19e6925d) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) - [Ensure submodules are imported correctly](https://github.com/yt-dlp/yt-dlp/commit/a250b247334ce9f641e709cbb64974da6034a2b3) by [pukkandan](https://github.com/pukkandan) - **cookies**: [Containers JSON should be opened as utf-8](https://github.com/yt-dlp/yt-dlp/commit/dab87ca23650fd87184ff5286b53e6985b59f71d) ([#7800](https://github.com/yt-dlp/yt-dlp/issues/7800)) by [bashonly](https://github.com/bashonly) - **dependencies**: [Handle deprecation of `sqlite3.version`](https://github.com/yt-dlp/yt-dlp/commit/35f9a306e6934793cff100200cd03f288ec33f11) ([#8167](https://github.com/yt-dlp/yt-dlp/issues/8167)) by [bashonly](https://github.com/bashonly) - **outtmpl**: [Fix replacement for `playlist_index`](https://github.com/yt-dlp/yt-dlp/commit/a264433c9fba147ecae2420091614186cfeeb895) by [pukkandan](https://github.com/pukkandan) - **utils** - [Add temporary shim for logging](https://github.com/yt-dlp/yt-dlp/commit/1b392f905d20ef1f1b300b180f867d43c9ce49b8) by [pukkandan](https://github.com/pukkandan) - [Improve `parse_duration`](https://github.com/yt-dlp/yt-dlp/commit/af86873218c24c3859ccf575a87f2b00a73b49d0) by [bashonly](https://github.com/bashonly) - HTTPHeaderDict: [Handle byte values](https://github.com/yt-dlp/yt-dlp/commit/3f7965105d8d2048359e67c1e8b8ebd51588143b) by [pukkandan](https://github.com/pukkandan) - `clean_podcast_url`: [Handle more trackers](https://github.com/yt-dlp/yt-dlp/commit/2af4eeb77246b8183aae75a0a8d19f18c08115b2) ([#7556](https://github.com/yt-dlp/yt-dlp/issues/7556)) by [bashonly](https://github.com/bashonly), [mabdelfattah](https://github.com/mabdelfattah) - `js_to_json`: [Handle `Array` objects](https://github.com/yt-dlp/yt-dlp/commit/52414d64ca7b92d3f83964cdd68247989b0c4625) by [Grub4K](https://github.com/Grub4K), [std-move](https://github.com/std-move) #### Extractor changes - [Extract subtitles from SMIL manifests](https://github.com/yt-dlp/yt-dlp/commit/550e65410a7a1b105923494ac44460a4dc1a15d9) ([#7667](https://github.com/yt-dlp/yt-dlp/issues/7667)) by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) - [Fix `--load-pages`](https://github.com/yt-dlp/yt-dlp/commit/81b4712bca608b9015aa68a4d96661d56e9cb894) by [pukkandan](https://github.com/pukkandan) - [Make `_search_nuxt_data` more lenient](https://github.com/yt-dlp/yt-dlp/commit/904a19ee93195ce0bd4b08bd22b186120afb5b17) by [std-move](https://github.com/std-move) - **abematv** - [Fix proxy handling](https://github.com/yt-dlp/yt-dlp/commit/497bbbbd7328cb705f70eced94dbd90993819a46) ([#8046](https://github.com/yt-dlp/yt-dlp/issues/8046)) by [SevenLives](https://github.com/SevenLives) - [Temporary fix for protocol handler](https://github.com/yt-dlp/yt-dlp/commit/9f66247289b9f8ecf931833b3f5f127274dd2161) by [pukkandan](https://github.com/pukkandan) - **amazonminitv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/538d37671a17e0782d17f08df17800e2e3bd57c8) by [bashonly](https://github.com/bashonly), [GautamMKGarg](https://github.com/GautamMKGarg) - **antenna**: [Support antenna.gr](https://github.com/yt-dlp/yt-dlp/commit/665876034c8d3c031443f6b4958bed02ccdf4164) ([#7584](https://github.com/yt-dlp/yt-dlp/issues/7584)) by [stdedos](https://github.com/stdedos) - **artetv**: [Fix HLS formats extraction](https://github.com/yt-dlp/yt-dlp/commit/c2da0b5ea215298135f76e3dc14b972a3c4afacb) by [bashonly](https://github.com/bashonly) - **axs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/aee6b9b88c0bcccf27fd23b7e00fc0b7b168928f) ([#8094](https://github.com/yt-dlp/yt-dlp/issues/8094)) by [barsnick](https://github.com/barsnick) - **banbye**: [Support video ids containing a hyphen](https://github.com/yt-dlp/yt-dlp/commit/578a82e497502b951036ce9da6fe0dac6937ac27) ([#8059](https://github.com/yt-dlp/yt-dlp/issues/8059)) by [kshitiz305](https://github.com/kshitiz305) - **bbc**: [Extract tracklist as chapters](https://github.com/yt-dlp/yt-dlp/commit/eda0e415d26eb084e570cf5372d38ee1f616b70f) ([#7788](https://github.com/yt-dlp/yt-dlp/issues/7788)) by [garret1317](https://github.com/garret1317) - **bild.de**: [Extract HLS formats](https://github.com/yt-dlp/yt-dlp/commit/b4c1c408c63724339eb12b16c91b253a7ee62cfa) ([#8032](https://github.com/yt-dlp/yt-dlp/issues/8032)) by [barsnick](https://github.com/barsnick) - **bilibili** - [Add support for series, favorites and watch later](https://github.com/yt-dlp/yt-dlp/commit/9e68747f9607f05e92bb7d9b6e79d678b50070e1) ([#7518](https://github.com/yt-dlp/yt-dlp/issues/7518)) by [c-basalt](https://github.com/c-basalt) - [Extract Dolby audio formats](https://github.com/yt-dlp/yt-dlp/commit/b84fda7388dd20d38921e23b469147f3957c1812) ([#8142](https://github.com/yt-dlp/yt-dlp/issues/8142)) by [ClosedPort22](https://github.com/ClosedPort22) - [Extract `format_id`](https://github.com/yt-dlp/yt-dlp/commit/5336bf57a7061e0955a37f0542fc8ebf50d55b17) ([#7555](https://github.com/yt-dlp/yt-dlp/issues/7555)) by [c-basalt](https://github.com/c-basalt) - **bilibilibangumi**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/bdd0b75e3f41ff35440eda6d395008beef19ef2f) ([#7337](https://github.com/yt-dlp/yt-dlp/issues/7337)) by [GD-Slime](https://github.com/GD-Slime) - **bpb**: [Overhaul extractor](https://github.com/yt-dlp/yt-dlp/commit/f659e6439444ac64305b5c80688cd82f59d2279c) ([#8119](https://github.com/yt-dlp/yt-dlp/issues/8119)) by [Grub4K](https://github.com/Grub4K) - **brilliantpala**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/92feb5654c5a4c81ba872904a618700fcbb3e546) ([#6680](https://github.com/yt-dlp/yt-dlp/issues/6680)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **canal1, caracoltvplay**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b3febedbeb662dfdf9b5c1d5799039ad4fc969de) ([#7151](https://github.com/yt-dlp/yt-dlp/issues/7151)) by [elyse0](https://github.com/elyse0) - **cbc**: [Ignore any 426 from API](https://github.com/yt-dlp/yt-dlp/commit/9bf14be775289bd88cc1f5c89fd761ae51879484) ([#7689](https://github.com/yt-dlp/yt-dlp/issues/7689)) by [makew0rld](https://github.com/makew0rld) - **cbcplayer**: [Extract HLS formats and subtitles](https://github.com/yt-dlp/yt-dlp/commit/339c339fec095ff4141b20e6aa83629117fb26df) ([#7484](https://github.com/yt-dlp/yt-dlp/issues/7484)) by [trainman261](https://github.com/trainman261) - **cbcplayerplaylist**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ed711897814f3ee0b1822e4205e74133467e8f1c) ([#7870](https://github.com/yt-dlp/yt-dlp/issues/7870)) by [trainman261](https://github.com/trainman261) - **cineverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/15591940ff102d1ae337d603a46d8f238c83a61f) ([#8146](https://github.com/yt-dlp/yt-dlp/issues/8146)) by [garret1317](https://github.com/garret1317) - **crunchyroll**: [Remove initial state extraction](https://github.com/yt-dlp/yt-dlp/commit/9b16762f48914de9ac914601769c76668e433325) ([#7632](https://github.com/yt-dlp/yt-dlp/issues/7632)) by [Grub4K](https://github.com/Grub4K) - **douyutv**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/21f40e75dfc0055ea9cdbd7fe2c46c6f9b561afd) ([#7652](https://github.com/yt-dlp/yt-dlp/issues/7652)) by [c-basalt](https://github.com/c-basalt) - **dropbox**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/b9f2bc2dbed2323734a0d18e65e1e2e23dc833d8) ([#7926](https://github.com/yt-dlp/yt-dlp/issues/7926)) by [bashonly](https://github.com/bashonly), [denhotte](https://github.com/denhotte), [nathantouze](https://github.com/nathantouze) (With fixes in [099fb1b](https://github.com/yt-dlp/yt-dlp/commit/099fb1b35cf835303306549f5113d1802d79c9c7) by [bashonly](https://github.com/bashonly)) - **eplus**: inbound: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/295fbb3ae3a7d0dd50e286be5c487cf145ed5778) ([#5782](https://github.com/yt-dlp/yt-dlp/issues/5782)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **expressen**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/a5e264d74b4bd60c6e7ec4e38f1a23af4e420531) ([#8153](https://github.com/yt-dlp/yt-dlp/issues/8153)) by [kylegustavo](https://github.com/kylegustavo) - **facebook** - [Add dash manifest URL](https://github.com/yt-dlp/yt-dlp/commit/a854fbec56d5004f5147116a41d1dd050632a579) ([#7743](https://github.com/yt-dlp/yt-dlp/issues/7743)) by [ringus1](https://github.com/ringus1) - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/d3d81cc98f554d0adb87d24bfd6fabaaa803944d) ([#7890](https://github.com/yt-dlp/yt-dlp/issues/7890)) by [ringus1](https://github.com/ringus1) - [Improve format sorting](https://github.com/yt-dlp/yt-dlp/commit/308936619c8a4f3a52d73c829c2006ff6c55fea2) ([#8074](https://github.com/yt-dlp/yt-dlp/issues/8074)) by [fireattack](https://github.com/fireattack) - reel: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bb5d84c9d2f1e978c3eddfb5ccbe138036682a36) ([#7564](https://github.com/yt-dlp/yt-dlp/issues/7564)) by [bashonly](https://github.com/bashonly), [demon071](https://github.com/demon071) - **fox**: [Support foxsports.com](https://github.com/yt-dlp/yt-dlp/commit/30b29f37159e9226e2f2d5434c9a4096ac4efa2e) ([#7724](https://github.com/yt-dlp/yt-dlp/issues/7724)) by [ischmidt20](https://github.com/ischmidt20) - **funker530**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/0ce1f48bf1cb78d40d734ce73ee1c90eccf92274) ([#8040](https://github.com/yt-dlp/yt-dlp/issues/8040)) by [04-pasha-04](https://github.com/04-pasha-04) - **generic** - [Fix KVS thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/53675852195d8dd859555d4789944a6887171ff8) by [bashonly](https://github.com/bashonly) - [Fix generic title for embeds](https://github.com/yt-dlp/yt-dlp/commit/994f7ef8e6003f4b7b258528755d0b6adcc31714) by [pukkandan](https://github.com/pukkandan) - **gofile**: [Update token](https://github.com/yt-dlp/yt-dlp/commit/99c99c7185f5d8e9b3699a6fc7f86ec663d7b97e) by [bashonly](https://github.com/bashonly) - **hotstar** - [Extract `release_year`](https://github.com/yt-dlp/yt-dlp/commit/7237c8dca0590aa7438ade93f927df88c9381ec7) ([#7869](https://github.com/yt-dlp/yt-dlp/issues/7869)) by [Rajeshwaran2001](https://github.com/Rajeshwaran2001) - [Make metadata extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/30ea88591b728cca0896018dbf67c2298070c669) by [bashonly](https://github.com/bashonly) - [Support `/clips/` URLs](https://github.com/yt-dlp/yt-dlp/commit/86eeb044c2342d68c6ef177577f87852e6badd85) ([#7710](https://github.com/yt-dlp/yt-dlp/issues/7710)) by [bashonly](https://github.com/bashonly) - **hungama**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/4b3a6ef1b3e235ba9a45142830b6edb357c71696) ([#7757](https://github.com/yt-dlp/yt-dlp/issues/7757)) by [bashonly](https://github.com/bashonly), [Yalab7](https://github.com/Yalab7) - **indavideoembed**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/63e0c5748c0eb461a2ccca4181616eb930b4b750) ([#8129](https://github.com/yt-dlp/yt-dlp/issues/8129)) by [aky-01](https://github.com/aky-01) - **iprima**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/568f08051841aedea968258889539741e26009e9) ([#7216](https://github.com/yt-dlp/yt-dlp/issues/7216)) by [std-move](https://github.com/std-move) - **lbry**: [Fix original format extraction](https://github.com/yt-dlp/yt-dlp/commit/127a22460658ac39cbe5c4b3fb88d578363e0dfa) ([#7711](https://github.com/yt-dlp/yt-dlp/issues/7711)) by [bashonly](https://github.com/bashonly) - **lecturio**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/efa2339502a37cf13ae7f143bd8b2c28f452d1cd) ([#7649](https://github.com/yt-dlp/yt-dlp/issues/7649)) by [simon300000](https://github.com/simon300000) - **magellantv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f4ea501551526ebcb54d19b84cf0ebe798583a85) ([#7616](https://github.com/yt-dlp/yt-dlp/issues/7616)) by [bashonly](https://github.com/bashonly) - **massengeschmack.tv**: [Fix title extraction](https://github.com/yt-dlp/yt-dlp/commit/81f46ac573dc443ad48560f308582a26784d3015) ([#7813](https://github.com/yt-dlp/yt-dlp/issues/7813)) by [sb0stn](https://github.com/sb0stn) - **media.ccc.de**: lists: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/cf11b40ac40e3d23a6352753296f3a732886efb9) ([#8144](https://github.com/yt-dlp/yt-dlp/issues/8144)) by [Rohxn16](https://github.com/Rohxn16) - **mediaite**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/630a55df8de7747e79aa680959d785dfff2c4b76) ([#7923](https://github.com/yt-dlp/yt-dlp/issues/7923)) by [Grabien](https://github.com/Grabien) - **mediaklikk**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6e07e4bc7e59f5bdb60e93c011e57b18b009f2b5) ([#8086](https://github.com/yt-dlp/yt-dlp/issues/8086)) by [bashonly](https://github.com/bashonly), [zhallgato](https://github.com/zhallgato) - **mediastream**: [Make embed extraction non-fatal](https://github.com/yt-dlp/yt-dlp/commit/635ae31f68a3ac7f6393d59657ed711e34ee3552) by [bashonly](https://github.com/bashonly) - **mixcloud**: [Update API URL](https://github.com/yt-dlp/yt-dlp/commit/7b71643cc986de9a3768dac4ac9b64f4d05e7f5e) ([#8114](https://github.com/yt-dlp/yt-dlp/issues/8114)) by [garret1317](https://github.com/garret1317) - **monstercat**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/eaee21bf71889d495076037cbe590c8c0b21ef3a) ([#8133](https://github.com/yt-dlp/yt-dlp/issues/8133)) by [garret1317](https://github.com/garret1317) - **motortrendondemand**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/c03a58ec9933e4a42c2d8fa80b8a0ddb2cde64e6) ([#7683](https://github.com/yt-dlp/yt-dlp/issues/7683)) by [AmirAflak](https://github.com/AmirAflak) - **museai**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/65cfa2b057d7946fbe322155a778fe206556d0c6) ([#7614](https://github.com/yt-dlp/yt-dlp/issues/7614)) by [bashonly](https://github.com/bashonly) - **mzaalo**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/d7aee8e310b2c4f21d50aac0b420e1b3abde21a4) by [bashonly](https://github.com/bashonly) - **n1info**: article: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/8ac5b6d96ae5c60cd5ae2495949e0068a6754c45) ([#7373](https://github.com/yt-dlp/yt-dlp/issues/7373)) by [u-spec-png](https://github.com/u-spec-png) - **nfl.com**: plus, replay: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/1eaca74bc2ca0f5b1ec532f24c61de44f2e8cb2d) ([#7838](https://github.com/yt-dlp/yt-dlp/issues/7838)) by [bashonly](https://github.com/bashonly) - **niconicochannelplus**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/698beb9a497f51693e64d167e572ff9efa4bc25f) ([#5686](https://github.com/yt-dlp/yt-dlp/issues/5686)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **nitter**: [Fix title extraction fallback](https://github.com/yt-dlp/yt-dlp/commit/a83da3717d30697102e76f63a6f29d77f9373c2a) ([#8102](https://github.com/yt-dlp/yt-dlp/issues/8102)) by [ApoorvShah111](https://github.com/ApoorvShah111) - **noodlemagazine**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/bae4834245a708fff97219849ec880c319c88bc6) ([#7830](https://github.com/yt-dlp/yt-dlp/issues/7830)) by [RedDeffender](https://github.com/RedDeffender) (With fixes in [69dbfe0](https://github.com/yt-dlp/yt-dlp/commit/69dbfe01c47cd078682a87f179f5846e2679e927) by [bashonly](https://github.com/bashonly)) - **novaembed**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2269065ad60cb0ab62408ae6a7b20283e5252232) ([#7910](https://github.com/yt-dlp/yt-dlp/issues/7910)) by [std-move](https://github.com/std-move) - **patreoncampaign**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/11de6fec9c9b8d34d1f90c8e6218ec58a3471b58) ([#7664](https://github.com/yt-dlp/yt-dlp/issues/7664)) by [bashonly](https://github.com/bashonly) - **pbs**: [Add extractor `PBSKidsIE`](https://github.com/yt-dlp/yt-dlp/commit/6d6081dda1290a85bdab6717f239289e3aa74c8e) ([#7602](https://github.com/yt-dlp/yt-dlp/issues/7602)) by [snixon](https://github.com/snixon) - **piapro**: [Support `/content` URL](https://github.com/yt-dlp/yt-dlp/commit/1bcb9fe8715b1f288efc322be3de409ee0597080) ([#7592](https://github.com/yt-dlp/yt-dlp/issues/7592)) by [FinnRG](https://github.com/FinnRG) - **piaulizaportal**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6636021206dad17c7745ae6bce6cb73d6f2ef319) ([#7903](https://github.com/yt-dlp/yt-dlp/issues/7903)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **picartovod**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/db9743894071760f994f640a4c24358f749a78c0) ([#7727](https://github.com/yt-dlp/yt-dlp/issues/7727)) by [Frankgoji](https://github.com/Frankgoji) - **pornbox**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/40999467f72db074a3f13057da9bf82a857530fe) ([#7386](https://github.com/yt-dlp/yt-dlp/issues/7386)) by [niemands](https://github.com/niemands) - **pornhub**: [Update access cookies for UK](https://github.com/yt-dlp/yt-dlp/commit/1d3d579c2142f69831b6ae140e1d8e824e07fa0e) ([#7591](https://github.com/yt-dlp/yt-dlp/issues/7591)) by [zhong-yiyu](https://github.com/zhong-yiyu) - **pr0gramm**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/b532556d0a85e7d76f8f0880861232fb706ddbc5) ([#8151](https://github.com/yt-dlp/yt-dlp/issues/8151)) by [Grub4K](https://github.com/Grub4K) - **radiofrance**: [Add support for livestreams, podcasts, playlists](https://github.com/yt-dlp/yt-dlp/commit/ba8e9eb2c8bbb699f314169fab8e544437ad731e) ([#7006](https://github.com/yt-dlp/yt-dlp/issues/7006)) by [elyse0](https://github.com/elyse0) - **rbgtum**: [Fix extraction and support new URL format](https://github.com/yt-dlp/yt-dlp/commit/5fccabac27ca3c1165ade1b0df6fbadc24258dc2) ([#7690](https://github.com/yt-dlp/yt-dlp/issues/7690)) by [simon300000](https://github.com/simon300000) - **reddit** - [Extract subtitles](https://github.com/yt-dlp/yt-dlp/commit/20c3c9b433dd47faf0dbde6b46e4e34eb76109a5) by [bashonly](https://github.com/bashonly) - [Fix thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/9a04113dfbb69b904e4e2bea736da293505786b8) by [bashonly](https://github.com/bashonly) - **rtvslo**: [Fix format extraction](https://github.com/yt-dlp/yt-dlp/commit/94389b225d9bcf29aa7ba8afaf1bbd7c62204eae) ([#8131](https://github.com/yt-dlp/yt-dlp/issues/8131)) by [bashonly](https://github.com/bashonly) - **rule34video**: [Extract tags](https://github.com/yt-dlp/yt-dlp/commit/58493923e9b6f774947a2131e5258e9f3cf816be) ([#7117](https://github.com/yt-dlp/yt-dlp/issues/7117)) by [soundchaser128](https://github.com/soundchaser128) - **rumble**: [Fix embed extraction](https://github.com/yt-dlp/yt-dlp/commit/23d829a3420450bcfb0788e6fb2cf4f6acdbe596) ([#8035](https://github.com/yt-dlp/yt-dlp/issues/8035)) by [trislee](https://github.com/trislee) - **s4c** - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b9de629d78ce31699f2de886071dc257830f9676) ([#7730](https://github.com/yt-dlp/yt-dlp/issues/7730)) by [ifan-t](https://github.com/ifan-t) - [Add series support and extract subs/thumbs](https://github.com/yt-dlp/yt-dlp/commit/fe371dcf0ba5ce8d42480eade54eeeac99ab3cb0) ([#7776](https://github.com/yt-dlp/yt-dlp/issues/7776)) by [ifan-t](https://github.com/ifan-t) - **sohu**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/5be7e978867b5f66ad6786c674d79d40e950ae16) ([#7628](https://github.com/yt-dlp/yt-dlp/issues/7628)) by [bashonly](https://github.com/bashonly), [c-basalt](https://github.com/c-basalt) - **stageplus**: [Fix m3u8 extraction](https://github.com/yt-dlp/yt-dlp/commit/56b3dc03354b75be995759d8441d2754c0442b9a) ([#7929](https://github.com/yt-dlp/yt-dlp/issues/7929)) by [bashonly](https://github.com/bashonly) - **streamanity**: [Remove](https://github.com/yt-dlp/yt-dlp/commit/2cfe221fbbe46faa3f46552c08d947a51f424903) ([#7571](https://github.com/yt-dlp/yt-dlp/issues/7571)) by [alerikaisattera](https://github.com/alerikaisattera) - **svtplay**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/2301b5c1b77a65abbb46b72f91e1e4666fd5d985) ([#7789](https://github.com/yt-dlp/yt-dlp/issues/7789)) by [dirkf](https://github.com/dirkf), [wader](https://github.com/wader) - **tbsjp**: [Add episode, program, playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/876b70c8edf4c0147f180bd981fbc4d625cbfb9c) ([#7765](https://github.com/yt-dlp/yt-dlp/issues/7765)) by [garret1317](https://github.com/garret1317) - **tiktok** - [Fix audio-only format extraction](https://github.com/yt-dlp/yt-dlp/commit/b09bd0c19648f60c59fb980cd454cb0069959fb9) ([#7712](https://github.com/yt-dlp/yt-dlp/issues/7712)) by [bashonly](https://github.com/bashonly) - [Fix webpage extraction](https://github.com/yt-dlp/yt-dlp/commit/069cbece9dba6384f1cc5fcfc7ce562a31af42fc) by [bashonly](https://github.com/bashonly) - **triller**: [Fix unlisted video extraction](https://github.com/yt-dlp/yt-dlp/commit/39837ae3199aa934299badbd0d63243ed639e6c8) ([#7670](https://github.com/yt-dlp/yt-dlp/issues/7670)) by [bashonly](https://github.com/bashonly) - **tv5mondeplus**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/7d3d658f4c558ee7d72b1c01b46f2126948681cd) ([#7952](https://github.com/yt-dlp/yt-dlp/issues/7952)) by [dirkf](https://github.com/dirkf), [korli](https://github.com/korli) - **twitcasting** - [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/cebbd33b1c678149fc8f0e254db6fc0da317ea80) ([#8120](https://github.com/yt-dlp/yt-dlp/issues/8120)) by [c-basalt](https://github.com/c-basalt) - [Support `--wait-for-video`](https://github.com/yt-dlp/yt-dlp/commit/c1d71d0d9f41db5e4306c86af232f5f6220a130b) ([#7975](https://github.com/yt-dlp/yt-dlp/issues/7975)) by [at-wat](https://github.com/at-wat) - **twitter** - [Add fallback, improve error handling](https://github.com/yt-dlp/yt-dlp/commit/6014355c6142f68e20c8374e3787e5b5820f19e2) ([#7621](https://github.com/yt-dlp/yt-dlp/issues/7621)) by [bashonly](https://github.com/bashonly) - [Fix GraphQL and legacy API](https://github.com/yt-dlp/yt-dlp/commit/92315c03774cfabb3a921884326beb4b981f786b) ([#7516](https://github.com/yt-dlp/yt-dlp/issues/7516)) by [bashonly](https://github.com/bashonly) - [Fix retweet extraction and syndication API](https://github.com/yt-dlp/yt-dlp/commit/a006ce2b27357c15792eb5c18f06765e640b801c) ([#8016](https://github.com/yt-dlp/yt-dlp/issues/8016)) by [bashonly](https://github.com/bashonly) - [Revert 92315c03774cfabb3a921884326beb4b981f786b](https://github.com/yt-dlp/yt-dlp/commit/b03fa7834579a01cc5fba48c0e73488a16683d48) by [pukkandan](https://github.com/pukkandan) - spaces - [Fix format protocol](https://github.com/yt-dlp/yt-dlp/commit/613dbce177d34ffc31053e8e01acf4bb107bcd1e) ([#7550](https://github.com/yt-dlp/yt-dlp/issues/7550)) by [bashonly](https://github.com/bashonly) - [Pass referer header to downloader](https://github.com/yt-dlp/yt-dlp/commit/c6ef553792ed48462f9fd0e78143bef6b1a71c2e) by [bashonly](https://github.com/bashonly) - **unsupported**: [List more sites with DRM](https://github.com/yt-dlp/yt-dlp/commit/e7057383380d7d53815f8feaf90ca3dcbde88983) by [pukkandan](https://github.com/pukkandan) - **videa**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/98eac0e6ba0e510ae7dfdfd249d42ee71fb272b1) ([#8003](https://github.com/yt-dlp/yt-dlp/issues/8003)) by [aky-01](https://github.com/aky-01), [hatsomatt](https://github.com/hatsomatt) - **vrt**: [Update token signing key](https://github.com/yt-dlp/yt-dlp/commit/325191d0c9bf3fe257b8a7c2eb95080f44f6ddfc) ([#7519](https://github.com/yt-dlp/yt-dlp/issues/7519)) by [Zprokkel](https://github.com/Zprokkel) - **wat.tv**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/7cccab79e7d00ed965b48b8cefce1da8a0513409) ([#7898](https://github.com/yt-dlp/yt-dlp/issues/7898)) by [davinkevin](https://github.com/davinkevin) - **wdr**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/5d0395498d7065aa5e55bac85fa9354b4b0d48eb) ([#7979](https://github.com/yt-dlp/yt-dlp/issues/7979)) by [szabyg](https://github.com/szabyg) - **web.archive**: vlive: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/9652bca1bd02f6bc1b8cb1e186f2ccbf32225561) ([#8132](https://github.com/yt-dlp/yt-dlp/issues/8132)) by [bashonly](https://github.com/bashonly) - **weibo**: [Fix extractor and support user extraction](https://github.com/yt-dlp/yt-dlp/commit/69b03f84f8378b0b5a2fbae56f9b7d860b2f529e) ([#7657](https://github.com/yt-dlp/yt-dlp/issues/7657)) by [c-basalt](https://github.com/c-basalt) - **weverse**: [Support extraction without auth](https://github.com/yt-dlp/yt-dlp/commit/c2d8ee0000302aba63476b7d5bd8793e57b6c8c6) ([#7924](https://github.com/yt-dlp/yt-dlp/issues/7924)) by [seproDev](https://github.com/seproDev) - **wimbledon**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/a15fcd299e767a510debd8dc1646fe863b96ce0e) ([#7551](https://github.com/yt-dlp/yt-dlp/issues/7551)) by [nnoboa](https://github.com/nnoboa) - **wrestleuniverseppv**: [Fix HLS AES key extraction](https://github.com/yt-dlp/yt-dlp/commit/dae349da97cafe7357106a8f3187fd48a2ad1210) by [bashonly](https://github.com/bashonly) - **youtube** - [Add `player_params` extractor arg](https://github.com/yt-dlp/yt-dlp/commit/ba06d77a316650ff057347d224b5afa8b203ad65) ([#7719](https://github.com/yt-dlp/yt-dlp/issues/7719)) by [coletdjnz](https://github.com/coletdjnz) - [Fix `player_params` arg being converted to lowercase](https://github.com/yt-dlp/yt-dlp/commit/546b2c28a106cf8101d481b215b676d1b091d276) by [coletdjnz](https://github.com/coletdjnz) - [Fix consent cookie](https://github.com/yt-dlp/yt-dlp/commit/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620) ([#7774](https://github.com/yt-dlp/yt-dlp/issues/7774)) by [coletdjnz](https://github.com/coletdjnz) - tab: [Detect looping feeds](https://github.com/yt-dlp/yt-dlp/commit/1ba6fe9db5f660d5538588315c23ad6cf0371c5f) ([#6621](https://github.com/yt-dlp/yt-dlp/issues/6621)) by [coletdjnz](https://github.com/coletdjnz) - **zaiko**: [Improve thumbnail extraction](https://github.com/yt-dlp/yt-dlp/commit/ecef42c3adbcb6a84405139047923c4967316f28) ([#8054](https://github.com/yt-dlp/yt-dlp/issues/8054)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **zee5**: [Update access token endpoint](https://github.com/yt-dlp/yt-dlp/commit/a0de8bb8601146b8f87bf7cd562eef8bfb4690be) ([#7914](https://github.com/yt-dlp/yt-dlp/issues/7914)) by [bashonly](https://github.com/bashonly) - **zoom**: [Extract duration](https://github.com/yt-dlp/yt-dlp/commit/66cc64ff6696f9921ff112a278542f8d999ffea4) by [bashonly](https://github.com/bashonly) #### Downloader changes - **external** - [Fix ffmpeg input from stdin](https://github.com/yt-dlp/yt-dlp/commit/e57eb98222d29cc4c09ee975d3c492274a6e5be3) ([#7655](https://github.com/yt-dlp/yt-dlp/issues/7655)) by [bashonly](https://github.com/bashonly) - [Fixes to cookie handling](https://github.com/yt-dlp/yt-dlp/commit/42ded0a429c20ec13dc006825e1508d9a02f0ad4) by [bashonly](https://github.com/bashonly) #### Postprocessor changes - **embedthumbnail**: [Support `m4v`](https://github.com/yt-dlp/yt-dlp/commit/8a4cd12c8f8e93292e3e95200b9d17a3af39624c) ([#7583](https://github.com/yt-dlp/yt-dlp/issues/7583)) by [Neurognostic](https://github.com/Neurognostic) #### Networking changes - [Add module](https://github.com/yt-dlp/yt-dlp/commit/c365dba8430ee33abda85d31f95128605bf240eb) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [pukkandan](https://github.com/pukkandan) - [Add request handler preference framework](https://github.com/yt-dlp/yt-dlp/commit/db7b054a6111ca387220d0eb87bf342f9c130eb8) ([#7603](https://github.com/yt-dlp/yt-dlp/issues/7603)) by [coletdjnz](https://github.com/coletdjnz) - [Add strict Request extension checking](https://github.com/yt-dlp/yt-dlp/commit/86aea0d3a213da3be1da638b9b828e6f0ee1d59f) ([#7604](https://github.com/yt-dlp/yt-dlp/issues/7604)) by [coletdjnz](https://github.com/coletdjnz) - [Fix POST requests with zero-length payloads](https://github.com/yt-dlp/yt-dlp/commit/71baa490ebd3655746430f208a9b605d120cd315) ([#7648](https://github.com/yt-dlp/yt-dlp/issues/7648)) by [bashonly](https://github.com/bashonly) - [Fix `--legacy-server-connect`](https://github.com/yt-dlp/yt-dlp/commit/75dc8e673b481a82d0688aeec30f6c65d82bb359) ([#7645](https://github.com/yt-dlp/yt-dlp/issues/7645)) by [bashonly](https://github.com/bashonly) - [Fix various socks proxy bugs](https://github.com/yt-dlp/yt-dlp/commit/20fbbd9249a2f26c7ae579bde5ba5d69aa8fac69) ([#8065](https://github.com/yt-dlp/yt-dlp/issues/8065)) by [coletdjnz](https://github.com/coletdjnz) - [Ignore invalid proxies in env](https://github.com/yt-dlp/yt-dlp/commit/bbeacff7fcaa3b521066088a5ccbf34ef5070d1d) ([#7704](https://github.com/yt-dlp/yt-dlp/issues/7704)) by [coletdjnz](https://github.com/coletdjnz) - [Rewrite architecture](https://github.com/yt-dlp/yt-dlp/commit/227bf1a33be7b89cd7d44ad046844c4ccba104f4) ([#2861](https://github.com/yt-dlp/yt-dlp/issues/2861)) by [coletdjnz](https://github.com/coletdjnz) - **Request Handler** - urllib - [Remove dot segments during URL normalization](https://github.com/yt-dlp/yt-dlp/commit/4bf912282a34b58b6b35d8f7e6be535770c89c76) ([#7662](https://github.com/yt-dlp/yt-dlp/issues/7662)) by [coletdjnz](https://github.com/coletdjnz) - [Simplify gzip decoding](https://github.com/yt-dlp/yt-dlp/commit/59e92b1f1833440bb2190f847eb735cf0f90bc85) ([#7611](https://github.com/yt-dlp/yt-dlp/issues/7611)) by [Grub4K](https://github.com/Grub4K) (With fixes in [77bff23](https://github.com/yt-dlp/yt-dlp/commit/77bff23ee97565bab2e0d75b893a21bf7983219a)) #### Misc. changes - **build**: [Make sure deprecated modules are added](https://github.com/yt-dlp/yt-dlp/commit/131d132da5c98c6c78bd7eed4b37f4458561b3d9) by [pukkandan](https://github.com/pukkandan) - **cleanup** - [Add color to `download-archive` message](https://github.com/yt-dlp/yt-dlp/commit/2b029ca0a9f9105c4f7626993fa60e54c9782749) ([#5138](https://github.com/yt-dlp/yt-dlp/issues/5138)) by [aaruni96](https://github.com/aaruni96), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - Miscellaneous - [6148833](https://github.com/yt-dlp/yt-dlp/commit/6148833f5ceb7674142ddb8d761ffe03cee7df69), [62b5c94](https://github.com/yt-dlp/yt-dlp/commit/62b5c94cadaa5f596dc1a7083db9db12efe357be) by [pukkandan](https://github.com/pukkandan) - [5ca095c](https://github.com/yt-dlp/yt-dlp/commit/5ca095cbcde3e32642a4fe5b2d69e8e3c785a021) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [gamer191](https://github.com/gamer191), [Grub4K](https://github.com/Grub4K), [sqrtNOT](https://github.com/sqrtNOT) - [088add9](https://github.com/yt-dlp/yt-dlp/commit/088add9567d39b758737e4299a0e619fd89d2e8f) by [Grub4K](https://github.com/Grub4K) - **devscripts**: `make_changelog`: [Fix changelog grouping and add networking group](https://github.com/yt-dlp/yt-dlp/commit/30ba233d4cee945756ed7344e7ddb3a90d2ae608) ([#8124](https://github.com/yt-dlp/yt-dlp/issues/8124)) by [Grub4K](https://github.com/Grub4K) - **docs**: [Update collaborators](https://github.com/yt-dlp/yt-dlp/commit/1be0a96a4d14f629097509fcc89d15f69a8243c7) by [Grub4K](https://github.com/Grub4K) - **test** - [Add tests for socks proxies](https://github.com/yt-dlp/yt-dlp/commit/fcd6a76adc49d5cd8783985c7ce35384b72e545f) ([#7908](https://github.com/yt-dlp/yt-dlp/issues/7908)) by [coletdjnz](https://github.com/coletdjnz) - [Fix `httplib_validation_errors` test for old Python versions](https://github.com/yt-dlp/yt-dlp/commit/95abea9a03289da1384e5bda3d590223ccc0a238) ([#7677](https://github.com/yt-dlp/yt-dlp/issues/7677)) by [coletdjnz](https://github.com/coletdjnz) - [Fix `test_load_certifi`](https://github.com/yt-dlp/yt-dlp/commit/de20687ee6b742646128a7629b57096631a20619) by [pukkandan](https://github.com/pukkandan) - download: [Test for `expected_exception`](https://github.com/yt-dlp/yt-dlp/commit/661c9a1d029296b28e0b2f8be8a72a43abaf6536) by [at-wat](https://github.com/at-wat) ### 2023.07.06 #### Important changes - Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj) - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains - Cookies are scoped when passed to external downloaders - Add `cookies` field to info.json and deprecate `http_headers.Cookie` #### Core changes - [Allow extractors to mark formats as potentially DRM](https://github.com/yt-dlp/yt-dlp/commit/bc344cd456380999c1ee74554dfd432a38f32ec7) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan) - [Bugfix for b4e0d75848e9447cee2cd3646ce54d4744a7ff56](https://github.com/yt-dlp/yt-dlp/commit/e59e20744eb32ce4b6ea0dece7c673be8376a710) by [pukkandan](https://github.com/pukkandan) - [Change how `Cookie` headers are handled](https://github.com/yt-dlp/yt-dlp/commit/3121512228487c9c690d3d39bfd2579addf96e07) by [Grub4K](https://github.com/Grub4K) - [Prevent `Cookie` leaks on HTTP redirect](https://github.com/yt-dlp/yt-dlp/commit/f8b4bcc0a791274223723488bfbfc23ea3276641) by [coletdjnz](https://github.com/coletdjnz) - **formats**: [Fix best fallback for storyboards](https://github.com/yt-dlp/yt-dlp/commit/906c0bdcd8974340d619e99ccd613c163eb0d0c2) by [pukkandan](https://github.com/pukkandan) - **outtmpl**: [Pad `playlist_index` etc even when with internal formatting](https://github.com/yt-dlp/yt-dlp/commit/47bcd437247152e0af5b3ebc5592db7bb66855c2) by [pukkandan](https://github.com/pukkandan) - **utils**: clean_podcast_url: [Handle protocol in redirect URL](https://github.com/yt-dlp/yt-dlp/commit/91302ed349f34dc26cc1d661bb45a4b71f4417f7) by [pukkandan](https://github.com/pukkandan) #### Extractor changes - **abc**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/8f05fbae2a79ce0713077ccc68b354e63216bf20) ([#7434](https://github.com/yt-dlp/yt-dlp/issues/7434)) by [meliber](https://github.com/meliber) - **AdultSwim**: [Extract subtitles from m3u8](https://github.com/yt-dlp/yt-dlp/commit/5e16cf92eb496b7c1541a6b1d727cb87542984db) ([#7421](https://github.com/yt-dlp/yt-dlp/issues/7421)) by [nnoboa](https://github.com/nnoboa) - **crunchyroll**: music: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/5b4b92769afcc398475e481bfa839f1158902fe9) ([#7439](https://github.com/yt-dlp/yt-dlp/issues/7439)) by [AmanSal1](https://github.com/AmanSal1), [rdamas](https://github.com/rdamas) - **Douyin**: [Fix extraction from webpage](https://github.com/yt-dlp/yt-dlp/commit/a2be9781fbf4d7e4db245c277ca2ecc41cf3a7b2) by [bashonly](https://github.com/bashonly) - **googledrive**: [Fix source format extraction](https://github.com/yt-dlp/yt-dlp/commit/3b7f5300c577fef40464d46d4e4037a69d51fe82) ([#7395](https://github.com/yt-dlp/yt-dlp/issues/7395)) by [RfadnjdExt](https://github.com/RfadnjdExt) - **kick**: [Fix `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/ef8509c300ea50da86aea447eb214d3d6f6db6bb) by [bashonly](https://github.com/bashonly) - **qdance**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f0a1ff118145b6449982ba401f9a9f656ecd8062) ([#7420](https://github.com/yt-dlp/yt-dlp/issues/7420)) by [bashonly](https://github.com/bashonly) - **sbs**: [Python 3.7 compat](https://github.com/yt-dlp/yt-dlp/commit/f393bbe724b1fc6c7f754a5da507e807b2b40ad2) by [pukkandan](https://github.com/pukkandan) - **stacommu**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/af1fd12f675220df6793fc019dff320bc76e8080) ([#7432](https://github.com/yt-dlp/yt-dlp/issues/7432)) by [urectanc](https://github.com/urectanc) - **twitter** - [Fix unauthenticated extraction](https://github.com/yt-dlp/yt-dlp/commit/49296437a8e5fa91dacb5446e51ab588474c85d3) ([#7476](https://github.com/yt-dlp/yt-dlp/issues/7476)) by [bashonly](https://github.com/bashonly) - spaces: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/1cffd621cb371f1563563cfb2fe37d137e8a7bee) ([#7512](https://github.com/yt-dlp/yt-dlp/issues/7512)) by [bashonly](https://github.com/bashonly) - **vidlii**: [Handle relative URLs](https://github.com/yt-dlp/yt-dlp/commit/ad8902f616ad2541f9b9626738f1393fad89a64c) by [pukkandan](https://github.com/pukkandan) - **vk**: VKPlay, VKPlayLive: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/8776349ef6b1f644584a92dfa00a05208a48edc4) ([#7358](https://github.com/yt-dlp/yt-dlp/issues/7358)) by [c-basalt](https://github.com/c-basalt) - **youtube** - [Add extractor-arg `formats`](https://github.com/yt-dlp/yt-dlp/commit/58786a10f212bd63f9ad1d0b4d9e4d31c3b385e2) by [pukkandan](https://github.com/pukkandan) - [Avoid false DRM detection](https://github.com/yt-dlp/yt-dlp/commit/94ed638a437fc766699d440e978982e24ce6a30a) ([#7396](https://github.com/yt-dlp/yt-dlp/issues/7396)) by [pukkandan](https://github.com/pukkandan) - [Fix comments' `is_favorited`](https://github.com/yt-dlp/yt-dlp/commit/89bed013741a776506f60380b7fd89d27d0710b4) ([#7390](https://github.com/yt-dlp/yt-dlp/issues/7390)) by [bbilly1](https://github.com/bbilly1) - [Ignore incomplete data for comment threads by default](https://github.com/yt-dlp/yt-dlp/commit/4dc4d8473c085900edc841c87c20041233d25b1f) ([#7475](https://github.com/yt-dlp/yt-dlp/issues/7475)) by [coletdjnz](https://github.com/coletdjnz) - [Process `post_live` over 2 hours](https://github.com/yt-dlp/yt-dlp/commit/d949c10c45bfc359bdacd52e6a180169b8128958) by [pukkandan](https://github.com/pukkandan) - stories: [Remove](https://github.com/yt-dlp/yt-dlp/commit/90db9a3c00ca80492c6a58c542e4cbf4c2710866) ([#7459](https://github.com/yt-dlp/yt-dlp/issues/7459)) by [pukkandan](https://github.com/pukkandan) - tab: [Support shorts-only playlists](https://github.com/yt-dlp/yt-dlp/commit/fcbc9ed760be6e3455bbadfaf277b4504b06f068) ([#7425](https://github.com/yt-dlp/yt-dlp/issues/7425)) by [coletdjnz](https://github.com/coletdjnz) #### Downloader changes - **aria2c**: [Add `--no-conf`](https://github.com/yt-dlp/yt-dlp/commit/8a8af356e3bba98a7f7d333aff0777d5d92130c8) by [pukkandan](https://github.com/pukkandan) - **external**: [Scope cookies](https://github.com/yt-dlp/yt-dlp/commit/1ceb657bdd254ad961489e5060f2ccc7d556b729) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz) - **http**: [Avoid infinite loop when no data is received](https://github.com/yt-dlp/yt-dlp/commit/662ef1e910b72e57957f06589925b2332ba52821) by [pukkandan](https://github.com/pukkandan) #### Misc. changes - [Add CodeQL workflow](https://github.com/yt-dlp/yt-dlp/commit/6355b5f1e1e8e7f4ef866d71d51e03baf0e82f17) ([#7497](https://github.com/yt-dlp/yt-dlp/issues/7497)) by [jorgectf](https://github.com/jorgectf) - **cleanup**: Miscellaneous: [337734d](https://github.com/yt-dlp/yt-dlp/commit/337734d4a8a6500bc65434843db346b5cbd05e81) by [pukkandan](https://github.com/pukkandan) - **docs**: [Minor fixes](https://github.com/yt-dlp/yt-dlp/commit/b532a3481046e1eabb6232ee8196fb696c356ff6) by [pukkandan](https://github.com/pukkandan) - **make_changelog**: [Skip reverted commits](https://github.com/yt-dlp/yt-dlp/commit/fa44802809d189fca0f4782263d48d6533384503) by [pukkandan](https://github.com/pukkandan) ### 2023.06.22 #### Core changes - [Fix bug in db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb](https://github.com/yt-dlp/yt-dlp/commit/d7cd97e8d8d42b500fea9abb2aa4ac9b0f98b2ad) by [pukkandan](https://github.com/pukkandan) - [Improve `--download-sections`](https://github.com/yt-dlp/yt-dlp/commit/b4e0d75848e9447cee2cd3646ce54d4744a7ff56) by [pukkandan](https://github.com/pukkandan) - Support negative time-ranges - Add `*from-url` to obey time-ranges in URL - [Indicate `filesize` approximated from `tbr` better](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) by [pukkandan](https://github.com/pukkandan) #### Extractor changes - [Support multiple `_VALID_URL`s](https://github.com/yt-dlp/yt-dlp/commit/5fd8367496b42c7b900b896a0d5460561a2859de) ([#5812](https://github.com/yt-dlp/yt-dlp/issues/5812)) by [nixxo](https://github.com/nixxo) - **dplay**: GlobalCyclingNetworkPlus: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/774aa09dd6aa61ced9ec818d1f67e53414d22762) ([#7360](https://github.com/yt-dlp/yt-dlp/issues/7360)) by [bashonly](https://github.com/bashonly) - **dropout**: [Fix season extraction](https://github.com/yt-dlp/yt-dlp/commit/db22142f6f817ff673d417b4b78e8db497bf8ab3) ([#7304](https://github.com/yt-dlp/yt-dlp/issues/7304)) by [OverlordQ](https://github.com/OverlordQ) - **motherless**: [Add gallery support, fix groups](https://github.com/yt-dlp/yt-dlp/commit/f2ff0f6f1914b82d4a51681a72cc0828115dcb4a) ([#7211](https://github.com/yt-dlp/yt-dlp/issues/7211)) by [rexlambert22](https://github.com/rexlambert22), [Ti4eeT4e](https://github.com/Ti4eeT4e) - **nebula**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/3f756c8c4095b942cf49788eb0862ceaf57847f2) ([#7156](https://github.com/yt-dlp/yt-dlp/issues/7156)) by [Lamieur](https://github.com/Lamieur), [rohieb](https://github.com/rohieb) - **rheinmaintv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/98cb1eda7a4cf67c96078980dbd63e6c06ad7f7c) ([#7311](https://github.com/yt-dlp/yt-dlp/issues/7311)) by [barthelmannk](https://github.com/barthelmannk) - **youtube** - [Add `ios` to default clients used](https://github.com/yt-dlp/yt-dlp/commit/1e75d97db21152acc764b30a688e516f04b8a142) by [pukkandan](https://github.com/pukkandan) - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively - IOS also has higher bit-rate 'premium' formats though they are not labeled as such - [Improve description parsing performance](https://github.com/yt-dlp/yt-dlp/commit/71dc18fa29263a1ff0472c23d81bfc8dd4422d48) ([#7315](https://github.com/yt-dlp/yt-dlp/issues/7315)) by [berkanteber](https://github.com/berkanteber), [pukkandan](https://github.com/pukkandan) - [Improve nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/cd810afe2ac5567c822b7424800fc470ef2d0045) by [pukkandan](https://github.com/pukkandan) - [Workaround 403 for android formats](https://github.com/yt-dlp/yt-dlp/commit/81ca451480051d7ce1a31c017e005358345a9149) by [pukkandan](https://github.com/pukkandan) #### Misc. changes - [Revert "Add automatic duplicate issue detection"](https://github.com/yt-dlp/yt-dlp/commit/a4486bfc1dc7057efca9dd3fe70d7fa25c56f700) by [pukkandan](https://github.com/pukkandan) - **cleanup** - Miscellaneous - [7f9c6a6](https://github.com/yt-dlp/yt-dlp/commit/7f9c6a63b16e145495479e9f666f5b9e2ee69e2f) by [bashonly](https://github.com/bashonly) - [812cdfa](https://github.com/yt-dlp/yt-dlp/commit/812cdfa06c33a40e73a8e04b3e6f42c084666a43) by [pukkandan](https://github.com/pukkandan) ### 2023.06.21 #### Important changes - YouTube: Improved throttling and signature fixes #### Core changes - [Add `--compat-option playlist-match-filter`](https://github.com/yt-dlp/yt-dlp/commit/93b39cdbd9dcf351bfa0c4ee252805b4617fdca9) by [pukkandan](https://github.com/pukkandan) - [Add `--no-quiet`](https://github.com/yt-dlp/yt-dlp/commit/d669772c65e8630162fd6555d0a578b246591921) by [pukkandan](https://github.com/pukkandan) - [Add option `--color`](https://github.com/yt-dlp/yt-dlp/commit/8417f26b8a819cd7ffcd4e000ca3e45033e670fb) ([#6904](https://github.com/yt-dlp/yt-dlp/issues/6904)) by [Grub4K](https://github.com/Grub4K) - [Add option `--netrc-cmd`](https://github.com/yt-dlp/yt-dlp/commit/db3ad8a67661d7b234a6954d9c6a4a9b1749f5eb) ([#6682](https://github.com/yt-dlp/yt-dlp/issues/6682)) by [NDagestad](https://github.com/NDagestad), [pukkandan](https://github.com/pukkandan) - [Add option `--xff`](https://github.com/yt-dlp/yt-dlp/commit/c16644642b08e2bf4130a6c5fa01395d8718c990) by [pukkandan](https://github.com/pukkandan) - [Auto-select default format in `-f-`](https://github.com/yt-dlp/yt-dlp/commit/372a0f3b9dadd1e52234b498aa4c7040ef868c7d) ([#7101](https://github.com/yt-dlp/yt-dlp/issues/7101)) by [ivanskodje](https://github.com/ivanskodje), [pukkandan](https://github.com/pukkandan) - [Deprecate internal `Youtubedl-no-compression` header](https://github.com/yt-dlp/yt-dlp/commit/955c89584b66fcd0fcfab3e611f1edeb1ca63886) ([#6876](https://github.com/yt-dlp/yt-dlp/issues/6876)) by [coletdjnz](https://github.com/coletdjnz) - [Do not translate newlines in `--print-to-file`](https://github.com/yt-dlp/yt-dlp/commit/9874e82b5a61582169300bea561b3e8899ad1ef7) by [pukkandan](https://github.com/pukkandan) - [Ensure pre-processor errors do not block `--print`](https://github.com/yt-dlp/yt-dlp/commit/f005a35aa7e4f67a0c603a946c0dd714c151b2d6) by [pukkandan](https://github.com/pukkandan) (With fixes in [17ba434](https://github.com/yt-dlp/yt-dlp/commit/17ba4343cf99701692a7f4798fd42b50f644faba)) - [Fix `filepath` being copied to underlying format dict](https://github.com/yt-dlp/yt-dlp/commit/84078a8b38f403495d00b46654c8750774d821de) by [pukkandan](https://github.com/pukkandan) - [Improve HTTP redirect handling](https://github.com/yt-dlp/yt-dlp/commit/08916a49c777cb6e000eec092881eb93ec22076c) ([#7094](https://github.com/yt-dlp/yt-dlp/issues/7094)) by [coletdjnz](https://github.com/coletdjnz) - [Populate `filename` and `urls` fields at all stages of `--print`](https://github.com/yt-dlp/yt-dlp/commit/170605840ea9d5ad75da6576485ea7d125b428ee) by [pukkandan](https://github.com/pukkandan) (With fixes in [b5f61b6](https://github.com/yt-dlp/yt-dlp/commit/b5f61b69d4561b81fc98c226b176f0c15493e688)) - [Relaxed validation for numeric format filters](https://github.com/yt-dlp/yt-dlp/commit/c3f624ef0a5d7a6ae1c5ffeb243087e9fc7d79dc) by [pukkandan](https://github.com/pukkandan) - [Support decoding multiple content encodings](https://github.com/yt-dlp/yt-dlp/commit/daafbf49b3482edae4d70dd37070be99742a926e) ([#7142](https://github.com/yt-dlp/yt-dlp/issues/7142)) by [coletdjnz](https://github.com/coletdjnz) - [Support loading info.json with a list at it's root](https://github.com/yt-dlp/yt-dlp/commit/ab1de9cb1e39cf421c2b7dc6756c6ff1955bb313) by [pukkandan](https://github.com/pukkandan) - [Workaround erroneous urllib Windows proxy parsing](https://github.com/yt-dlp/yt-dlp/commit/3f66b6fe50f8d5b545712f8b19d5ae62f5373980) ([#7092](https://github.com/yt-dlp/yt-dlp/issues/7092)) by [coletdjnz](https://github.com/coletdjnz) - **cookies** - [Defer extraction of v11 key from keyring](https://github.com/yt-dlp/yt-dlp/commit/9b7a48abd1b187eae1e3f6c9839c47d43ccec00b) by [Grub4K](https://github.com/Grub4K) - [Move `YoutubeDLCookieJar` to cookies module](https://github.com/yt-dlp/yt-dlp/commit/b87e01c123fd560b6a674ce00f45a9459d82d98a) ([#7091](https://github.com/yt-dlp/yt-dlp/issues/7091)) by [coletdjnz](https://github.com/coletdjnz) - [Support custom Safari cookies path](https://github.com/yt-dlp/yt-dlp/commit/a58182b75a05fe0a10c5e94a536711d3ade19c20) ([#6783](https://github.com/yt-dlp/yt-dlp/issues/6783)) by [NextFire](https://github.com/NextFire) - [Update for chromium changes](https://github.com/yt-dlp/yt-dlp/commit/b38d4c941d1993ab27e4c0f8e024e23c2ec0f8f8) ([#6897](https://github.com/yt-dlp/yt-dlp/issues/6897)) by [mbway](https://github.com/mbway) - **Cryptodome**: [Fix `__bool__`](https://github.com/yt-dlp/yt-dlp/commit/98ac902c4979e4529b166e873473bef42baa2e3e) by [pukkandan](https://github.com/pukkandan) - **jsinterp** - [Do not compile regex](https://github.com/yt-dlp/yt-dlp/commit/7aeda6cc9e73ada0b0a0b6a6748c66bef63a20a8) by [pukkandan](https://github.com/pukkandan) - [Fix division](https://github.com/yt-dlp/yt-dlp/commit/b4a252fba81f53631c07ca40ce7583f5d19a8a36) ([#7279](https://github.com/yt-dlp/yt-dlp/issues/7279)) by [bashonly](https://github.com/bashonly) - [Fix global object extraction](https://github.com/yt-dlp/yt-dlp/commit/01aba2519a0884ef17d5f85608dbd2a455577147) by [pukkandan](https://github.com/pukkandan) - [Handle `NaN` in bitwise operators](https://github.com/yt-dlp/yt-dlp/commit/1d7656184c6b8aa46b29149893894b3c24f1df00) by [pukkandan](https://github.com/pukkandan) - [Handle negative numbers better](https://github.com/yt-dlp/yt-dlp/commit/7cf51f21916292cd80bdeceb37489f5322f166dd) by [pukkandan](https://github.com/pukkandan) - **outtmpl** - [Allow `\n` in replacements and default.](https://github.com/yt-dlp/yt-dlp/commit/78fde6e3398ff11e5d383a66b28664badeab5180) by [pukkandan](https://github.com/pukkandan) - [Fix some minor bugs](https://github.com/yt-dlp/yt-dlp/commit/ebe1b4e34f43c3acad30e4bcb8484681a030c114) by [pukkandan](https://github.com/pukkandan) (With fixes in [1619ab3](https://github.com/yt-dlp/yt-dlp/commit/1619ab3e67d8dc4f86fc7ed292c79345bc0d91a0)) - [Support `str.format` syntax inside replacements](https://github.com/yt-dlp/yt-dlp/commit/ec9311c41b111110bc52cfbd6ea682c6fb23f77a) by [pukkandan](https://github.com/pukkandan) - **update** - [Better error handling](https://github.com/yt-dlp/yt-dlp/commit/d2e84d5eb01c66fc5304e8566348d65a7be24ed7) by [pukkandan](https://github.com/pukkandan) - [Do not restart into versions without `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/02948a17d903f544363bb20b51a6d8baed7bba08) by [pukkandan](https://github.com/pukkandan) - [Implement `--update-to` repo](https://github.com/yt-dlp/yt-dlp/commit/665472a7de3880578c0b7b3f95c71570c056368e) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - **upstream** - [Merged with youtube-dl 07af47](https://github.com/yt-dlp/yt-dlp/commit/42f2d40b475db66486a4b4fe5b56751a640db5db) by [pukkandan](https://github.com/pukkandan) - [Merged with youtube-dl d1c6c5](https://github.com/yt-dlp/yt-dlp/commit/4823ec9f461512daa1b8ab362893bb86a6320b26) by [pukkandan](https://github.com/pukkandan) (With fixes in [edbe5b5](https://github.com/yt-dlp/yt-dlp/commit/edbe5b589dd0860a67b4e03f58db3cd2539d91c2) by [bashonly](https://github.com/bashonly)) - **utils** - `FormatSorter`: [Improve `size` and `br`](https://github.com/yt-dlp/yt-dlp/commit/eedda5252c05327748dede204a8fccafa0288118) by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) - `js_to_json`: [Implement template strings](https://github.com/yt-dlp/yt-dlp/commit/0898c5c8ccadfc404472456a7a7751b72afebadd) ([#6623](https://github.com/yt-dlp/yt-dlp/issues/6623)) by [Grub4K](https://github.com/Grub4K) - `locked_file`: [Fix for virtiofs](https://github.com/yt-dlp/yt-dlp/commit/45998b3e371b819ce0dbe50da703809a048cc2fe) ([#6840](https://github.com/yt-dlp/yt-dlp/issues/6840)) by [brandon-dacrib](https://github.com/brandon-dacrib) - `strftime_or_none`: [Handle negative timestamps](https://github.com/yt-dlp/yt-dlp/commit/a35af4306d24c56c6358f89cdf204860d1cd62b4) by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) - `traverse_obj` - [Allow iterables in traversal](https://github.com/yt-dlp/yt-dlp/commit/21b5ec86c2c37d10c5bb97edd7051d3aac16bb3e) ([#6902](https://github.com/yt-dlp/yt-dlp/issues/6902)) by [Grub4K](https://github.com/Grub4K) - [More fixes](https://github.com/yt-dlp/yt-dlp/commit/b079c26f0af8085bccdadc72c61c8164ca5ab0f8) ([#6959](https://github.com/yt-dlp/yt-dlp/issues/6959)) by [Grub4K](https://github.com/Grub4K) - `write_string`: [Fix noconsole behavior](https://github.com/yt-dlp/yt-dlp/commit/3b479100df02e20dd949e046003ae96ddbfced57) by [Grub4K](https://github.com/Grub4K) #### Extractor changes - [Do not exit early for unsuitable `url_result`](https://github.com/yt-dlp/yt-dlp/commit/baa922b5c74b10e3b86ff5e6cf6529b3aae8efab) by [pukkandan](https://github.com/pukkandan) - [Do not warn for invalid chapter data in description](https://github.com/yt-dlp/yt-dlp/commit/84ffeb7d5e72e3829319ba7720a8480fc4c7503b) by [pukkandan](https://github.com/pukkandan) - [Extract more metadata from ISM](https://github.com/yt-dlp/yt-dlp/commit/f68434cc74cfd3db01b266476a2eac8329fbb267) by [pukkandan](https://github.com/pukkandan) - **abematv**: [Add fallback for title and description extraction and extract more metadata](https://github.com/yt-dlp/yt-dlp/commit/c449c0655d7c8549e6e1389c26b628053b253d39) ([#6994](https://github.com/yt-dlp/yt-dlp/issues/6994)) by [Lesmiscore](https://github.com/Lesmiscore) - **acast**: [Support embeds](https://github.com/yt-dlp/yt-dlp/commit/c91ac833ea99b00506e470a44cf930e4e23378c9) ([#7212](https://github.com/yt-dlp/yt-dlp/issues/7212)) by [pabs3](https://github.com/pabs3) - **adobepass**: [Handle `Charter_Direct` MSO as `Spectrum`](https://github.com/yt-dlp/yt-dlp/commit/ea0570820336a0fe9c3b530d1b0d1e59313274f4) ([#6824](https://github.com/yt-dlp/yt-dlp/issues/6824)) by [bashonly](https://github.com/bashonly) - **aeonco**: [Support Youtube embeds](https://github.com/yt-dlp/yt-dlp/commit/ed81b74802b4247ee8d9dc0ef87eb52baefede1c) ([#6591](https://github.com/yt-dlp/yt-dlp/issues/6591)) by [alexklapheke](https://github.com/alexklapheke) - **afreecatv**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/fdd69db38924c38194ef236b26325d66ac815c88) ([#6283](https://github.com/yt-dlp/yt-dlp/issues/6283)) by [blmarket](https://github.com/blmarket) - **ARDBetaMediathek**: [Add thumbnail](https://github.com/yt-dlp/yt-dlp/commit/f78eb41e1c0f1dcdb10317358a26bf541dc7ee15) ([#6890](https://github.com/yt-dlp/yt-dlp/issues/6890)) by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) - **bibeltv**: [Fix extraction, support live streams and series](https://github.com/yt-dlp/yt-dlp/commit/4ad58667c102bd82a7c4cca8aa395ec1682e3b4c) ([#6505](https://github.com/yt-dlp/yt-dlp/issues/6505)) by [flashdagger](https://github.com/flashdagger) - **bilibili** - [Support festival videos](https://github.com/yt-dlp/yt-dlp/commit/ab29e47029e2f5b48abbbab78e82faf7cf6e9506) ([#6547](https://github.com/yt-dlp/yt-dlp/issues/6547)) by [qbnu](https://github.com/qbnu) - SpaceVideo: [Extract signature](https://github.com/yt-dlp/yt-dlp/commit/6f10cdcf7eeaeae5b75e0a4428cd649c156a2d83) ([#7149](https://github.com/yt-dlp/yt-dlp/issues/7149)) by [elyse0](https://github.com/elyse0) - **biliIntl**: [Add comment extraction](https://github.com/yt-dlp/yt-dlp/commit/b093c38cc9f26b59a8504211d792f053142c847d) ([#6079](https://github.com/yt-dlp/yt-dlp/issues/6079)) by [HobbyistDev](https://github.com/HobbyistDev) - **bitchute**: [Add more fallback subdomains](https://github.com/yt-dlp/yt-dlp/commit/0c4e0fbcade0fc92d14c2a6d63e360fe067f6192) ([#6907](https://github.com/yt-dlp/yt-dlp/issues/6907)) by [Neurognostic](https://github.com/Neurognostic) - **booyah**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/f7f7a877bf8e87fd4eb0ad2494ad948ca7691114) by [pukkandan](https://github.com/pukkandan) - **BrainPOP**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/979568f26ece80bca72b48f0dd57d676e431059a) ([#6106](https://github.com/yt-dlp/yt-dlp/issues/6106)) by [MinePlayersPE](https://github.com/MinePlayersPE) - **bravotv** - [Detect DRM](https://github.com/yt-dlp/yt-dlp/commit/1fe5bf240e6ade487d18079a62aa36bcc440a27a) ([#7171](https://github.com/yt-dlp/yt-dlp/issues/7171)) by [bashonly](https://github.com/bashonly) - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/06966cb8966b9aa4f60ab9c44c182a057d4ca3a3) ([#6568](https://github.com/yt-dlp/yt-dlp/issues/6568)) by [bashonly](https://github.com/bashonly) - **camfm**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/4cbfa570a1b9bd65b0f48770693377e8d842dcb0) ([#7083](https://github.com/yt-dlp/yt-dlp/issues/7083)) by [garret1317](https://github.com/garret1317) - **cbc** - [Fix live extractor, playlist `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/7a7b1376fbce0067cf37566bb47131bc0022638d) ([#6625](https://github.com/yt-dlp/yt-dlp/issues/6625)) by [makew0rld](https://github.com/makew0rld) - [Ignore 426 from API](https://github.com/yt-dlp/yt-dlp/commit/4afb208cf07b59291ae3b0c4efc83945ee5b8812) ([#6781](https://github.com/yt-dlp/yt-dlp/issues/6781)) by [jo-nike](https://github.com/jo-nike) - gem: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/871c907454693940cb56906ed9ea49fcb7154829) ([#6499](https://github.com/yt-dlp/yt-dlp/issues/6499)) by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) - **cbs**: [Add `ParamountPressExpress` extractor](https://github.com/yt-dlp/yt-dlp/commit/44369c9afa996e14e9f466754481d878811b5b4a) ([#6604](https://github.com/yt-dlp/yt-dlp/issues/6604)) by [bashonly](https://github.com/bashonly) - **cbsnews**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/f6e43d6fa9804c24525e1fed0a87782754dab7ed) ([#6681](https://github.com/yt-dlp/yt-dlp/issues/6681)) by [bashonly](https://github.com/bashonly) - **chilloutzone**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6f4fc5660f40f3458882a8f51601eae4af7be609) ([#6445](https://github.com/yt-dlp/yt-dlp/issues/6445)) by [bashonly](https://github.com/bashonly) - **clipchamp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2f07c4c1da4361af213e5791279b9d152d2e4ce3) ([#6978](https://github.com/yt-dlp/yt-dlp/issues/6978)) by [bashonly](https://github.com/bashonly) - **comedycentral**: [Add support for movies](https://github.com/yt-dlp/yt-dlp/commit/66468bbf49562ff82670cbbd456c5e8448a6df34) ([#7108](https://github.com/yt-dlp/yt-dlp/issues/7108)) by [sqrtNOT](https://github.com/sqrtNOT) - **crtvg**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/26c517b29c8727e47948d6fff749d5297f0efb60) ([#7168](https://github.com/yt-dlp/yt-dlp/issues/7168)) by [ItzMaxTV](https://github.com/ItzMaxTV) - **crunchyroll**: [Rework with support for movies, music and artists](https://github.com/yt-dlp/yt-dlp/commit/032de83ea9ff2f4977d9c71a93bbc1775597b762) ([#6237](https://github.com/yt-dlp/yt-dlp/issues/6237)) by [Grub4K](https://github.com/Grub4K) - **dacast**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/c25cac2f8e5fbac2737a426d7778fd2f0efc5381) ([#6896](https://github.com/yt-dlp/yt-dlp/issues/6896)) by [bashonly](https://github.com/bashonly) - **daftsex**: [Update domain and embed player url](https://github.com/yt-dlp/yt-dlp/commit/fc5a7f9b27d2a89b1f3ca7d33a95301c21d832cd) ([#5966](https://github.com/yt-dlp/yt-dlp/issues/5966)) by [JChris246](https://github.com/JChris246) - **DigitalConcertHall**: [Support films](https://github.com/yt-dlp/yt-dlp/commit/55ed4ff73487feb3177b037dfc2ea527e777da3e) ([#7202](https://github.com/yt-dlp/yt-dlp/issues/7202)) by [ItzMaxTV](https://github.com/ItzMaxTV) - **discogs**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6daaf21092888beff11b807cd46f832f1f9c46a0) ([#6624](https://github.com/yt-dlp/yt-dlp/issues/6624)) by [rjy](https://github.com/rjy) - **dlf**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b423b6a48e0b19260bc95ab7d72d2138d7f124dc) ([#6697](https://github.com/yt-dlp/yt-dlp/issues/6697)) by [nick-cd](https://github.com/nick-cd) - **drtv**: [Fix radio page extraction](https://github.com/yt-dlp/yt-dlp/commit/9a06b7b1891b48cebbe275652ae8025a36d97d97) ([#6552](https://github.com/yt-dlp/yt-dlp/issues/6552)) by [viktor-enzell](https://github.com/viktor-enzell) - **Dumpert**: [Fix m3u8 and support new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/f8ae441501596733e2b967430471643a1d7cacb8) ([#6091](https://github.com/yt-dlp/yt-dlp/issues/6091)) by [DataGhost](https://github.com/DataGhost), [pukkandan](https://github.com/pukkandan) - **elevensports**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/ecfe47973f6603b5367fe2cc3c65274627d94516) ([#7172](https://github.com/yt-dlp/yt-dlp/issues/7172)) by [ItzMaxTV](https://github.com/ItzMaxTV) - **ettutv**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/83465fc4100a2fb2c188898fbc2f3021f6a9b4dd) ([#6579](https://github.com/yt-dlp/yt-dlp/issues/6579)) by [elyse0](https://github.com/elyse0) - **europarl**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/03789976d301eaed3e957dbc041573098f6af059) ([#7114](https://github.com/yt-dlp/yt-dlp/issues/7114)) by [HobbyistDev](https://github.com/HobbyistDev) - **eurosport**: [Improve `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/45e87ea106ad37b2a002663fa30ee41ce97b16cd) ([#7076](https://github.com/yt-dlp/yt-dlp/issues/7076)) by [HobbyistDev](https://github.com/HobbyistDev) - **facebook**: [Fix metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/3b52a606881e6adadc33444abdeacce562b79330) ([#6856](https://github.com/yt-dlp/yt-dlp/issues/6856)) by [ringus1](https://github.com/ringus1) - **foxnews**: [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/97d60ad8cd6c99f01e463a9acfce8693aff2a609) ([#7222](https://github.com/yt-dlp/yt-dlp/issues/7222)) by [bashonly](https://github.com/bashonly) - **funker530**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/cab94a0cd8b6d3fffed5a6faff030274adbed182) ([#7291](https://github.com/yt-dlp/yt-dlp/issues/7291)) by [Cyberes](https://github.com/Cyberes) - **generic** - [Accept values for `fragment_query`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/5cc0a8fd2e9fec50026fb92170b57993af939e4a) ([#6600](https://github.com/yt-dlp/yt-dlp/issues/6600)) by [bashonly](https://github.com/bashonly) (With fixes in [9bfe0d1](https://github.com/yt-dlp/yt-dlp/commit/9bfe0d15bd7dbdc6b0e6378fa9f5e2e289b2373b)) - [Add extractor-args `hls_key`, `variant_query`](https://github.com/yt-dlp/yt-dlp/commit/c2e0fc40a73dd85ab3920f977f579d475e66ef59) ([#6567](https://github.com/yt-dlp/yt-dlp/issues/6567)) by [bashonly](https://github.com/bashonly) - [Attempt to detect live HLS](https://github.com/yt-dlp/yt-dlp/commit/93e7c6995e07dafb9dcc06c0d06acf6c5bdfecc5) ([#6775](https://github.com/yt-dlp/yt-dlp/issues/6775)) by [bashonly](https://github.com/bashonly) - **genius**: [Add support for articles](https://github.com/yt-dlp/yt-dlp/commit/460da07439718d9af1e3661da2a23e05a913a2e6) ([#6474](https://github.com/yt-dlp/yt-dlp/issues/6474)) by [bashonly](https://github.com/bashonly) - **globalplayer**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/30647668a92a0ca5cd108776804baac0996bd9f7) ([#6903](https://github.com/yt-dlp/yt-dlp/issues/6903)) by [garret1317](https://github.com/garret1317) - **gmanetwork**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2d97d154fe4fb84fe2ed3a4e1ed5819e89b71e88) ([#5945](https://github.com/yt-dlp/yt-dlp/issues/5945)) by [HobbyistDev](https://github.com/HobbyistDev) - **gronkh**: [Extract duration and chapters](https://github.com/yt-dlp/yt-dlp/commit/9c92b803fa24e48543ce969468d5404376e315b7) ([#6817](https://github.com/yt-dlp/yt-dlp/issues/6817)) by [satan1st](https://github.com/satan1st) - **hentaistigma**: [Remove extractor](https://github.com/yt-dlp/yt-dlp/commit/04f8018a0544736a18494bc3899d06b05b78fae6) by [pukkandan](https://github.com/pukkandan) - **hidive**: [Fix login](https://github.com/yt-dlp/yt-dlp/commit/e6ab678e36c40ded0aae305bbb866cdab554d417) by [pukkandan](https://github.com/pukkandan) - **hollywoodreporter**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/6bdb64e2a2a6d504d8ce1dc830fbfb8a7f199c63) ([#6614](https://github.com/yt-dlp/yt-dlp/issues/6614)) by [bashonly](https://github.com/bashonly) - **hotstar**: [Support `/shows/` URLs](https://github.com/yt-dlp/yt-dlp/commit/7f8ddebbb51c9fd4a347306332a718ba41b371b8) ([#7225](https://github.com/yt-dlp/yt-dlp/issues/7225)) by [bashonly](https://github.com/bashonly) - **hrefli**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/7e35526d5b970a034b9d76215ee3e4bd7631edcd) ([#6762](https://github.com/yt-dlp/yt-dlp/issues/6762)) by [selfisekai](https://github.com/selfisekai) - **idolplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5c14b213679ed4401288bdc86ae696932e219222) ([#6732](https://github.com/yt-dlp/yt-dlp/issues/6732)) by [ping](https://github.com/ping) - **iq**: [Set more language codes](https://github.com/yt-dlp/yt-dlp/commit/2d5cae9636714ff922d28c548c349d5f2b48f317) ([#6476](https://github.com/yt-dlp/yt-dlp/issues/6476)) by [D0LLYNH0](https://github.com/D0LLYNH0) - **iwara** - [Accept old URLs](https://github.com/yt-dlp/yt-dlp/commit/ab92d8651c48d247dfb7d3f0a824cc986e47c7ed) by [Lesmiscore](https://github.com/Lesmiscore) - [Fix authentication](https://github.com/yt-dlp/yt-dlp/commit/0a5d7c39e17bb9bd50c9db42bcad40eb82d7f784) ([#7137](https://github.com/yt-dlp/yt-dlp/issues/7137)) by [toomyzoom](https://github.com/toomyzoom) - [Fix format sorting](https://github.com/yt-dlp/yt-dlp/commit/56793f74c36899742d7abd52afb0deca97d469e1) ([#6651](https://github.com/yt-dlp/yt-dlp/issues/6651)) by [hasezoey](https://github.com/hasezoey) - [Fix typo](https://github.com/yt-dlp/yt-dlp/commit/d1483ec693c79f0b4ddf493870bcb840aca4da08) by [Lesmiscore](https://github.com/Lesmiscore) - [Implement login](https://github.com/yt-dlp/yt-dlp/commit/21b9413cf7dd4830b2ece57af21589dd4538fc52) ([#6721](https://github.com/yt-dlp/yt-dlp/issues/6721)) by [toomyzoom](https://github.com/toomyzoom) - [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/c14af7a741931b364bab3d9546c0f4359f318f8c) ([#6557](https://github.com/yt-dlp/yt-dlp/issues/6557)) by [Lesmiscore](https://github.com/Lesmiscore) - [Report private videos](https://github.com/yt-dlp/yt-dlp/commit/95a383be1b6fb00c92ee3fb091732c4f6009acb6) ([#6641](https://github.com/yt-dlp/yt-dlp/issues/6641)) by [Lesmiscore](https://github.com/Lesmiscore) - **JStream**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3459d3c5af3b2572ed51e8ecfda6c11022a838c6) ([#6252](https://github.com/yt-dlp/yt-dlp/issues/6252)) by [Lesmiscore](https://github.com/Lesmiscore) - **jwplatform**: [Update `_extract_embed_urls`](https://github.com/yt-dlp/yt-dlp/commit/cf9fd52fabe71d6e7c30d3ea525029ffa561fc9c) ([#6383](https://github.com/yt-dlp/yt-dlp/issues/6383)) by [carusocr](https://github.com/carusocr) - **kick**: [Make initial request non-fatal](https://github.com/yt-dlp/yt-dlp/commit/0a6918a4a1431960181d8c50e0bbbcb0afbaff9a) by [bashonly](https://github.com/bashonly) - **LastFM**: [Rewrite playlist extraction](https://github.com/yt-dlp/yt-dlp/commit/026435714cb7c39613a0d7d2acd15d3823b78d94) ([#6379](https://github.com/yt-dlp/yt-dlp/issues/6379)) by [hatienl0i261299](https://github.com/hatienl0i261299), [pukkandan](https://github.com/pukkandan) - **lbry**: [Extract original quality formats](https://github.com/yt-dlp/yt-dlp/commit/44c0d66442b568d9e1359e669d8b029b08a77fa7) ([#7257](https://github.com/yt-dlp/yt-dlp/issues/7257)) by [bashonly](https://github.com/bashonly) - **line**: [Remove extractors](https://github.com/yt-dlp/yt-dlp/commit/faa0332ed69e070cf3bd31390589a596e962f392) ([#6734](https://github.com/yt-dlp/yt-dlp/issues/6734)) by [sian1468](https://github.com/sian1468) - **livestream**: [Support videos with account id](https://github.com/yt-dlp/yt-dlp/commit/bfdf144c7e5d7a93fbfa9d8e65598c72bf2b542a) ([#6324](https://github.com/yt-dlp/yt-dlp/issues/6324)) by [theperfectpunk](https://github.com/theperfectpunk) - **medaltv**: [Fix clips](https://github.com/yt-dlp/yt-dlp/commit/1e3c2b6ec28d7ab5e31341fa93c47b65be4fbff4) ([#6502](https://github.com/yt-dlp/yt-dlp/issues/6502)) by [xenova](https://github.com/xenova) - **mediastream**: [Improve `WinSports` and embed extraction](https://github.com/yt-dlp/yt-dlp/commit/03025b6e105139d01cd415ddc51fd692957fd2ba) ([#6426](https://github.com/yt-dlp/yt-dlp/issues/6426)) by [bashonly](https://github.com/bashonly) - **mgtv**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/59d9fe08312bbb76ee26238d207a8ca35410a48d) ([#7234](https://github.com/yt-dlp/yt-dlp/issues/7234)) by [bashonly](https://github.com/bashonly) - **Mzaalo**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/dc3c44f349ba85af320e706e2a27ad81a78b1c6e) ([#7163](https://github.com/yt-dlp/yt-dlp/issues/7163)) by [ItzMaxTV](https://github.com/ItzMaxTV) - **nbc**: [Fix `NBCStations` direct mp4 formats](https://github.com/yt-dlp/yt-dlp/commit/9be0fe1fd967f62cbf3c60bd14e1021a70abc147) ([#6637](https://github.com/yt-dlp/yt-dlp/issues/6637)) by [bashonly](https://github.com/bashonly) - **nebula**: [Add `beta.nebula.tv`](https://github.com/yt-dlp/yt-dlp/commit/cbfe2e5cbe0f4649a91e323a82b8f5f774f36662) ([#6516](https://github.com/yt-dlp/yt-dlp/issues/6516)) by [unbeatable-101](https://github.com/unbeatable-101) - **nekohacker**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/489f51279d00318018478fd7461eddbe3b45297e) ([#7003](https://github.com/yt-dlp/yt-dlp/issues/7003)) by [hasezoey](https://github.com/hasezoey) - **nhk** - [Add `NhkRadiru` extractor](https://github.com/yt-dlp/yt-dlp/commit/8f0be90ecb3b8d862397177bb226f17b245ef933) ([#6819](https://github.com/yt-dlp/yt-dlp/issues/6819)) by [garret1317](https://github.com/garret1317) - [Fix API extraction](https://github.com/yt-dlp/yt-dlp/commit/f41b949a2ef646fbc36375febbe3f0c19d742c0f) ([#7180](https://github.com/yt-dlp/yt-dlp/issues/7180)) by [menschel](https://github.com/menschel), [sjthespian](https://github.com/sjthespian) - `NhkRadiruLive`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/81c8b9bdd9841b72cbfc1bbff9dab5fb4aa038b0) ([#7332](https://github.com/yt-dlp/yt-dlp/issues/7332)) by [garret1317](https://github.com/garret1317) - **niconico** - [Download comments from the new endpoint](https://github.com/yt-dlp/yt-dlp/commit/52ecc33e221f7de7eb6fed6c22489f0c5fdd2c6d) ([#6773](https://github.com/yt-dlp/yt-dlp/issues/6773)) by [Lesmiscore](https://github.com/Lesmiscore) - live: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8f9250fe280d37f0988646cd5cc0072f4d33a6d) ([#5764](https://github.com/yt-dlp/yt-dlp/issues/5764)) by [Lesmiscore](https://github.com/Lesmiscore) - series: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/c86e433c35fe5da6cb29f3539eef97497f84ed38) ([#6898](https://github.com/yt-dlp/yt-dlp/issues/6898)) by [sqrtNOT](https://github.com/sqrtNOT) - **nubilesporn**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/d4e6ef40772e0560a8ed33b844ef7549e86837be) ([#6231](https://github.com/yt-dlp/yt-dlp/issues/6231)) by [permunkle](https://github.com/permunkle) - **odnoklassniki**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/1a2eb5bda51d8b7a78a65acebf72a0dcf9da196b) ([#7217](https://github.com/yt-dlp/yt-dlp/issues/7217)) by [bashonly](https://github.com/bashonly) - **opencast** - [Add ltitools to `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/3588be59cee429a0ab5c4ceb2f162298bb44147d) ([#6371](https://github.com/yt-dlp/yt-dlp/issues/6371)) by [C0D3D3V](https://github.com/C0D3D3V) - [Fix format bug](https://github.com/yt-dlp/yt-dlp/commit/89dbf0848370deaa55af88c3593a2a264124caf5) ([#6512](https://github.com/yt-dlp/yt-dlp/issues/6512)) by [C0D3D3V](https://github.com/C0D3D3V) - **owncloud**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c6d4b82a8b8bce59b1c9ce5e6d349ea428dac0a7) ([#6533](https://github.com/yt-dlp/yt-dlp/issues/6533)) by [C0D3D3V](https://github.com/C0D3D3V) - **Parler**: [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/80ea6d3dea8483cddd39fc89b5ee1fc06670c33c) ([#6446](https://github.com/yt-dlp/yt-dlp/issues/6446)) by [JChris246](https://github.com/JChris246) - **pgatour**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/3ae182ad89e1427ff7b1684d6a44ff93fa857a0c) ([#6613](https://github.com/yt-dlp/yt-dlp/issues/6613)) by [bashonly](https://github.com/bashonly) - **playsuisse**: [Support new url format](https://github.com/yt-dlp/yt-dlp/commit/94627c5dde12a72766bdba36e056916c29c40ed1) ([#6528](https://github.com/yt-dlp/yt-dlp/issues/6528)) by [sbor23](https://github.com/sbor23) - **polskieradio**: [Improve extractors](https://github.com/yt-dlp/yt-dlp/commit/738c90a463257634455ada3e5c18b714c531dede) ([#5948](https://github.com/yt-dlp/yt-dlp/issues/5948)) by [selfisekai](https://github.com/selfisekai) - **pornez**: [Support new URL formats](https://github.com/yt-dlp/yt-dlp/commit/cbdf9408e6f1e35e98fd6477b3d6902df5b8a47f) ([#6792](https://github.com/yt-dlp/yt-dlp/issues/6792)) by [zhgwn](https://github.com/zhgwn) - **pornhub**: [Set access cookies to fix extraction](https://github.com/yt-dlp/yt-dlp/commit/62beefa818c75c20b6941389bb197051554a5d41) ([#6685](https://github.com/yt-dlp/yt-dlp/issues/6685)) by [arobase-che](https://github.com/arobase-che), [Schmoaaaaah](https://github.com/Schmoaaaaah) - **rai**: [Rewrite extractors](https://github.com/yt-dlp/yt-dlp/commit/c6d3f81a4077aaf9cffc6aa2d0dec92f38e74bb0) ([#5940](https://github.com/yt-dlp/yt-dlp/issues/5940)) by [danog](https://github.com/danog), [nixxo](https://github.com/nixxo) - **recurbate**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2502cfed91415c7ccfff925fd3404d230046484) ([#6297](https://github.com/yt-dlp/yt-dlp/issues/6297)) by [mrscrapy](https://github.com/mrscrapy) - **reddit** - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/4d9280c9c853733534dda60486fa949bcca36c9e) ([#6950](https://github.com/yt-dlp/yt-dlp/issues/6950)) by [bashonly](https://github.com/bashonly) - [Support cookies and short URLs](https://github.com/yt-dlp/yt-dlp/commit/7a6f6f24592a8065376f11a58e44878807732cf6) ([#6825](https://github.com/yt-dlp/yt-dlp/issues/6825)) by [bashonly](https://github.com/bashonly) - **rokfin**: [Re-construct manifest url](https://github.com/yt-dlp/yt-dlp/commit/7a6c8a0807941dd24fbf0d6172e811884f98e027) ([#6507](https://github.com/yt-dlp/yt-dlp/issues/6507)) by [vampirefrog](https://github.com/vampirefrog) - **rottentomatoes**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/2d306c03d6f2697fcbabb7da35aa62cc078359d3) ([#6844](https://github.com/yt-dlp/yt-dlp/issues/6844)) by [JChris246](https://github.com/JChris246) - **rozhlas** - [Extract manifest formats](https://github.com/yt-dlp/yt-dlp/commit/e4cf7741f9302b3faa092962f2895b55cb3d89bb) ([#6590](https://github.com/yt-dlp/yt-dlp/issues/6590)) by [bashonly](https://github.com/bashonly) - `MujRozhlas`: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/c2b801fea59628d5c873e06a0727fbf2051bbd1f) ([#7129](https://github.com/yt-dlp/yt-dlp/issues/7129)) by [stanoarn](https://github.com/stanoarn) - **rtvc**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/9b30cd3dfce83c2f0201b28a7a3ef44ab9722664) ([#6578](https://github.com/yt-dlp/yt-dlp/issues/6578)) by [elyse0](https://github.com/elyse0) - **rumble** - [Detect timeline format](https://github.com/yt-dlp/yt-dlp/commit/78bc1868ff3352108ab2911033d1ac67a55f151e) by [pukkandan](https://github.com/pukkandan) - [Fix videos without quality selection](https://github.com/yt-dlp/yt-dlp/commit/6994afc030d2a786d8032075ed71a14d7eac5a4f) by [pukkandan](https://github.com/pukkandan) - **sbs**: [Overhaul extractor for new API](https://github.com/yt-dlp/yt-dlp/commit/6a765f135ccb654861336ea27a2c1c24ea8e286f) ([#6839](https://github.com/yt-dlp/yt-dlp/issues/6839)) by [bashonly](https://github.com/bashonly), [dirkf](https://github.com/dirkf), [vidiot720](https://github.com/vidiot720) - **shemaroome**: [Pass `stream_key` header to downloader](https://github.com/yt-dlp/yt-dlp/commit/7bc92517463f5766e9d9b92c3823b5cf403c0e3d) ([#7224](https://github.com/yt-dlp/yt-dlp/issues/7224)) by [bashonly](https://github.com/bashonly) - **sonyliv**: [Fix login with token](https://github.com/yt-dlp/yt-dlp/commit/4815d35c191e7d375b94492a6486dd2ba43a8954) ([#7223](https://github.com/yt-dlp/yt-dlp/issues/7223)) by [bashonly](https://github.com/bashonly) - **stageplus**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/e5265dc6517478e589ee3c1ff0cb19bdf4e35ce1) ([#6838](https://github.com/yt-dlp/yt-dlp/issues/6838)) by [bashonly](https://github.com/bashonly) - **stripchat**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/f9213f8a2d7ba46b912afe1dd3ce6bb700a33d72) ([#7306](https://github.com/yt-dlp/yt-dlp/issues/7306)) by [foreignBlade](https://github.com/foreignBlade) - **substack**: [Fix extraction](https://github.com/yt-dlp/yt-dlp/commit/12037d8b0a578fcc78a5c8f98964e48ee6060e25) ([#7218](https://github.com/yt-dlp/yt-dlp/issues/7218)) by [bashonly](https://github.com/bashonly) - **sverigesradio**: [Support slug URLs](https://github.com/yt-dlp/yt-dlp/commit/5ee9a7d6e18ceea956e831994cf11c423979354f) ([#7220](https://github.com/yt-dlp/yt-dlp/issues/7220)) by [bashonly](https://github.com/bashonly) - **tagesschau**: [Fix single audio urls](https://github.com/yt-dlp/yt-dlp/commit/af7585c824a1e405bd8afa46d87b4be322edc93c) ([#6626](https://github.com/yt-dlp/yt-dlp/issues/6626)) by [flashdagger](https://github.com/flashdagger) - **teamcoco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/c459d45dd4d417fb80a52e1a04e607776a44baa4) ([#6437](https://github.com/yt-dlp/yt-dlp/issues/6437)) by [bashonly](https://github.com/bashonly) - **telecaribe**: [Expand livestream support](https://github.com/yt-dlp/yt-dlp/commit/69b2f838d3d3e37dc17367ef64d978db1bea45cf) ([#6601](https://github.com/yt-dlp/yt-dlp/issues/6601)) by [bashonly](https://github.com/bashonly) - **tencent**: [Fix fatal metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/971d901d129403e875a04dd92109507a03fbc070) ([#7219](https://github.com/yt-dlp/yt-dlp/issues/7219)) by [bashonly](https://github.com/bashonly) - **thesun**: [Update `_VALID_URL`](https://github.com/yt-dlp/yt-dlp/commit/0181b9a1b31db3fde943f7cd3fe9662f23bff292) ([#6522](https://github.com/yt-dlp/yt-dlp/issues/6522)) by [hatienl0i261299](https://github.com/hatienl0i261299) - **tiktok** - [Extract 1080p adaptive formats](https://github.com/yt-dlp/yt-dlp/commit/c2a1bdb00931969193f2a31ea27b9c66a07aaec2) ([#7228](https://github.com/yt-dlp/yt-dlp/issues/7228)) by [bashonly](https://github.com/bashonly) - [Fix and improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/925936908a3c3ee0e508621db14696b9f6a8b563) ([#6777](https://github.com/yt-dlp/yt-dlp/issues/6777)) by [bashonly](https://github.com/bashonly) - [Fix mp3 formats](https://github.com/yt-dlp/yt-dlp/commit/8ceb07e870424c219dced8f4348729553f05c5cc) ([#6615](https://github.com/yt-dlp/yt-dlp/issues/6615)) by [bashonly](https://github.com/bashonly) - [Fix resolution extraction](https://github.com/yt-dlp/yt-dlp/commit/ab6057ec80aa75db6303b8206916d00c376c622c) ([#7237](https://github.com/yt-dlp/yt-dlp/issues/7237)) by [puc9](https://github.com/puc9) - [Improve `TikTokLive` extractor](https://github.com/yt-dlp/yt-dlp/commit/216bcb66d7dce0762767d751dad10650cb57da9d) ([#6520](https://github.com/yt-dlp/yt-dlp/issues/6520)) by [bashonly](https://github.com/bashonly) - **triller**: [Support short URLs, detect removed videos](https://github.com/yt-dlp/yt-dlp/commit/33b737bedf8383c0d00d4e1d06a5273dcdfdb756) ([#6636](https://github.com/yt-dlp/yt-dlp/issues/6636)) by [bashonly](https://github.com/bashonly) - **tv4**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/125ffaa1737dd04716f2f6fbb0595ad3eb7a4b1c) ([#5649](https://github.com/yt-dlp/yt-dlp/issues/5649)) by [dirkf](https://github.com/dirkf), [TxI5](https://github.com/TxI5) - **tvp**: [Use new API](https://github.com/yt-dlp/yt-dlp/commit/0c7ce146e4d2a84e656d78f6857952bfd25ab389) ([#6989](https://github.com/yt-dlp/yt-dlp/issues/6989)) by [selfisekai](https://github.com/selfisekai) - **tvplay**: [Remove outdated domains](https://github.com/yt-dlp/yt-dlp/commit/937264419f9bf375d5656785ae6e53282587c15d) ([#7106](https://github.com/yt-dlp/yt-dlp/issues/7106)) by [ivanskodje](https://github.com/ivanskodje) - **twitch** - [Extract original size thumbnail](https://github.com/yt-dlp/yt-dlp/commit/80b732b7a9585b2a61e456dc0d2d014a439cbaee) ([#6629](https://github.com/yt-dlp/yt-dlp/issues/6629)) by [JC-Chung](https://github.com/JC-Chung) - [Fix `is_live`](https://github.com/yt-dlp/yt-dlp/commit/0551511b45f7847f40e4314aa9e624e80d086539) ([#6500](https://github.com/yt-dlp/yt-dlp/issues/6500)) by [elyse0](https://github.com/elyse0) - [Support mobile clips](https://github.com/yt-dlp/yt-dlp/commit/02312c03cf53eb1da24c9ad022ee79af26060733) ([#6699](https://github.com/yt-dlp/yt-dlp/issues/6699)) by [bepvte](https://github.com/bepvte) - [Update `_CLIENT_ID` and add extractor-arg](https://github.com/yt-dlp/yt-dlp/commit/01231feb142e80828985aabdec04ac608e3d43e2) ([#7200](https://github.com/yt-dlp/yt-dlp/issues/7200)) by [bashonly](https://github.com/bashonly) - vod: [Support links from schedule tab](https://github.com/yt-dlp/yt-dlp/commit/dbce5afa6bb61f6272ade613f2e9a3d66b88c7ea) ([#7071](https://github.com/yt-dlp/yt-dlp/issues/7071)) by [falbrechtskirchinger](https://github.com/falbrechtskirchinger) - **twitter** - [Add login support](https://github.com/yt-dlp/yt-dlp/commit/d1795f4a6af99c976c9d3ea2dabe5cf4f8965d3c) ([#7258](https://github.com/yt-dlp/yt-dlp/issues/7258)) by [bashonly](https://github.com/bashonly) - [Default to GraphQL, handle auth errors](https://github.com/yt-dlp/yt-dlp/commit/147e62fc584c3ea6fdb09bb7a47905df68553a22) ([#6957](https://github.com/yt-dlp/yt-dlp/issues/6957)) by [bashonly](https://github.com/bashonly) - spaces: [Add `release_timestamp`](https://github.com/yt-dlp/yt-dlp/commit/1c16d9df5330819cc79ad588b24aa5b72765c168) ([#7186](https://github.com/yt-dlp/yt-dlp/issues/7186)) by [CeruleanSky](https://github.com/CeruleanSky) - **urplay**: [Extract all subtitles](https://github.com/yt-dlp/yt-dlp/commit/7bcd4813215ac98daa4949af2ffc677c78307a38) ([#7309](https://github.com/yt-dlp/yt-dlp/issues/7309)) by [hoaluvn](https://github.com/hoaluvn) - **voot**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/4f7b11cc1c1cebf598107e00cd7295588ed484da) ([#7227](https://github.com/yt-dlp/yt-dlp/issues/7227)) by [bashonly](https://github.com/bashonly) - **vrt**: [Overhaul extractors](https://github.com/yt-dlp/yt-dlp/commit/1a7dcca378e80a387923ee05c250d8ba122441c6) ([#6244](https://github.com/yt-dlp/yt-dlp/issues/6244)) by [bashonly](https://github.com/bashonly), [bergoid](https://github.com/bergoid), [jeroenj](https://github.com/jeroenj) - **weverse**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/b844a3f8b16500663e7ab6c6ec061cc9b30f71ac) ([#6711](https://github.com/yt-dlp/yt-dlp/issues/6711)) by [bashonly](https://github.com/bashonly) (With fixes in [fd5d93f](https://github.com/yt-dlp/yt-dlp/commit/fd5d93f7040f9776fd541f4e4079dad7d3b3fb4f)) - **wevidi**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1ea15603d852971ed7d92f4de12808b27b3d9370) ([#6868](https://github.com/yt-dlp/yt-dlp/issues/6868)) by [truedread](https://github.com/truedread) - **weyyak**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6dc00acf0f1f1107a626c21befd1691403e6aeeb) ([#7124](https://github.com/yt-dlp/yt-dlp/issues/7124)) by [ItzMaxTV](https://github.com/ItzMaxTV) - **whyp**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/2c566ed14101673c651c08c306c30fa5b4010b85) ([#6803](https://github.com/yt-dlp/yt-dlp/issues/6803)) by [CoryTibbettsDev](https://github.com/CoryTibbettsDev) - **wrestleuniverse** - [Fix cookies support](https://github.com/yt-dlp/yt-dlp/commit/c8561c6d03f025268d6d3972abeb47987c8d7cbb) by [bashonly](https://github.com/bashonly) - [Fix extraction, add login](https://github.com/yt-dlp/yt-dlp/commit/ef8fb7f029b816dfc95600727d84400591a3b5c5) ([#6982](https://github.com/yt-dlp/yt-dlp/issues/6982)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - **wykop**: [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/aed945e1b9b7d3af2a907e1a12e6508cc81d6a20) ([#6140](https://github.com/yt-dlp/yt-dlp/issues/6140)) by [selfisekai](https://github.com/selfisekai) - **ximalaya**: [Sort playlist entries](https://github.com/yt-dlp/yt-dlp/commit/8790ea7b2536332777bce68590386b1aa935fac7) ([#7292](https://github.com/yt-dlp/yt-dlp/issues/7292)) by [linsui](https://github.com/linsui) - **YahooGyaOIE, YahooGyaOPlayerIE**: [Delete extractors due to website close](https://github.com/yt-dlp/yt-dlp/commit/68be95bd0ca3f76aa63c9812935bd826b3a42e53) ([#6218](https://github.com/yt-dlp/yt-dlp/issues/6218)) by [Lesmiscore](https://github.com/Lesmiscore) - **yappy**: YappyProfile: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/6f69101dc912690338d32e2aab085c32e44eba3f) ([#7346](https://github.com/yt-dlp/yt-dlp/issues/7346)) by [7vlad7](https://github.com/7vlad7) - **youku**: [Improve error message](https://github.com/yt-dlp/yt-dlp/commit/ef0848abd425dfda6db62baa8d72897eefb0007f) ([#6690](https://github.com/yt-dlp/yt-dlp/issues/6690)) by [carusocr](https://github.com/carusocr) - **youporn**: [Extract m3u8 formats](https://github.com/yt-dlp/yt-dlp/commit/ddae33754ae1f32dd9c64cf895c47d20f6b5f336) by [pukkandan](https://github.com/pukkandan) - **youtube** - [Add client name to `format_note` when `-v`](https://github.com/yt-dlp/yt-dlp/commit/c795c39f27244cbce846067891827e4847036441) ([#6254](https://github.com/yt-dlp/yt-dlp/issues/6254)) by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) - [Add extractor-arg `include_duplicate_formats`](https://github.com/yt-dlp/yt-dlp/commit/86cb922118b236306310a72657f70426c20e28bb) by [pukkandan](https://github.com/pukkandan) - [Bypass throttling for `-f17`](https://github.com/yt-dlp/yt-dlp/commit/c9abebb851e6188cb34b9eb744c1863dd46af919) by [pukkandan](https://github.com/pukkandan) - [Construct fragment list lazily](https://github.com/yt-dlp/yt-dlp/commit/2a23d92d9ec44a0168079e38bcf3d383e5c4c7bb) by [pukkandan](https://github.com/pukkandan) (With fixes in [e389d17](https://github.com/yt-dlp/yt-dlp/commit/e389d172b6f42e4f332ae679dc48543fb7b9b61d)) - [Define strict uploader metadata mapping](https://github.com/yt-dlp/yt-dlp/commit/7666b93604b97e9ada981c6b04ccf5605dd1bd44) ([#6384](https://github.com/yt-dlp/yt-dlp/issues/6384)) by [coletdjnz](https://github.com/coletdjnz) - [Determine audio language using automatic captions](https://github.com/yt-dlp/yt-dlp/commit/ff9b0e071ffae5543cc309e6f9e647ac51e5846e) by [pukkandan](https://github.com/pukkandan) - [Extract `channel_is_verified`](https://github.com/yt-dlp/yt-dlp/commit/8213ce28a485e200f6a7e1af1434a987c8e702bd) ([#7213](https://github.com/yt-dlp/yt-dlp/issues/7213)) by [coletdjnz](https://github.com/coletdjnz) - [Extract `heatmap` data](https://github.com/yt-dlp/yt-dlp/commit/5caf30dbc34f10b0be60676fece635b5c59f0d72) ([#7100](https://github.com/yt-dlp/yt-dlp/issues/7100)) by [tntmod54321](https://github.com/tntmod54321) - [Extract more metadata for comments](https://github.com/yt-dlp/yt-dlp/commit/c35448b7b14113b35c4415dbfbf488c4731f006f) ([#7179](https://github.com/yt-dlp/yt-dlp/issues/7179)) by [coletdjnz](https://github.com/coletdjnz) - [Extract uploader metadata for feed/playlist items](https://github.com/yt-dlp/yt-dlp/commit/93e12ed76ef49252dc6869b59d21d0777e5e11af) by [coletdjnz](https://github.com/coletdjnz) - [Fix comment loop detection for pinned comments](https://github.com/yt-dlp/yt-dlp/commit/141a8dff98874a426d7fbe772e0a8421bb42656f) ([#6714](https://github.com/yt-dlp/yt-dlp/issues/6714)) by [coletdjnz](https://github.com/coletdjnz) - [Fix continuation loop with no comments](https://github.com/yt-dlp/yt-dlp/commit/18f8fba7c89a87f99cc3313a1795848867e84fff) ([#7148](https://github.com/yt-dlp/yt-dlp/issues/7148)) by [coletdjnz](https://github.com/coletdjnz) - [Fix parsing `comment_count`](https://github.com/yt-dlp/yt-dlp/commit/071670cbeaa01ddf2cc20a95ae6da25f8f086431) ([#6523](https://github.com/yt-dlp/yt-dlp/issues/6523)) by [nick-cd](https://github.com/nick-cd) - [Handle incomplete initial data from watch page](https://github.com/yt-dlp/yt-dlp/commit/607510b9f2f67bfe7d33d74031a5c1fe22a24862) ([#6510](https://github.com/yt-dlp/yt-dlp/issues/6510)) by [coletdjnz](https://github.com/coletdjnz) - [Ignore wrong fps of some formats](https://github.com/yt-dlp/yt-dlp/commit/97afb093d4cbe5df889145afa5f9ede4535e93e4) by [pukkandan](https://github.com/pukkandan) - [Misc cleanup](https://github.com/yt-dlp/yt-dlp/commit/14a14335b280766fbf5a469ae26836d6c1fe450a) by [coletdjnz](https://github.com/coletdjnz) - [Prioritize premium formats](https://github.com/yt-dlp/yt-dlp/commit/51a07b0dca4c079d58311c19b6d1c097c24bb021) by [pukkandan](https://github.com/pukkandan) - [Revert default formats to `https`](https://github.com/yt-dlp/yt-dlp/commit/c6786ff3baaf72a5baa4d56d34058e54cbcf8ceb) by [pukkandan](https://github.com/pukkandan) - [Support podcasts and releases tabs](https://github.com/yt-dlp/yt-dlp/commit/447afb9eaa65bc677e3245c83e53a8e69c174a3c) by [coletdjnz](https://github.com/coletdjnz) - [Support shorter relative time format](https://github.com/yt-dlp/yt-dlp/commit/2fb35f6004c7625f0dd493da4a5abf0690f7777c) ([#7191](https://github.com/yt-dlp/yt-dlp/issues/7191)) by [coletdjnz](https://github.com/coletdjnz) - music_search_url: [Extract title](https://github.com/yt-dlp/yt-dlp/commit/69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2) ([#7102](https://github.com/yt-dlp/yt-dlp/issues/7102)) by [kangalio](https://github.com/kangalio) - **zaiko** - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/345b4c0aedd9d19898ce00d5cef35fe0d277a052) ([#7254](https://github.com/yt-dlp/yt-dlp/issues/7254)) by [c-basalt](https://github.com/c-basalt) - ZaikoETicket: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/5cc09c004bd5edbbada9b041c08a720cadc4f4df) ([#7347](https://github.com/yt-dlp/yt-dlp/issues/7347)) by [pzhlkj6612](https://github.com/pzhlkj6612) - **zdf**: [Fix formats extraction](https://github.com/yt-dlp/yt-dlp/commit/ee0ed0338df328cd986f97315c8162b5a151476d) by [bashonly](https://github.com/bashonly) - **zee5**: [Fix extraction of new content](https://github.com/yt-dlp/yt-dlp/commit/9d7fde89a40360396f0baa2ee8bf507f92108b32) ([#7280](https://github.com/yt-dlp/yt-dlp/issues/7280)) by [bashonly](https://github.com/bashonly) - **zingmp3**: [Fix and improve extractors](https://github.com/yt-dlp/yt-dlp/commit/17d7ca84ea723c20668bd9bfa938be7ea0e64f6b) ([#6367](https://github.com/yt-dlp/yt-dlp/issues/6367)) by [hatienl0i261299](https://github.com/hatienl0i261299) - **zoom** - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/79c77e85b70ae3b9942d5a88c14d021a9bd24222) ([#6741](https://github.com/yt-dlp/yt-dlp/issues/6741)) by [shreyasminocha](https://github.com/shreyasminocha) - [Fix share URL extraction](https://github.com/yt-dlp/yt-dlp/commit/90c1f5120694105496a6ad9e3ecfc6c25de6cae1) ([#6789](https://github.com/yt-dlp/yt-dlp/issues/6789)) by [bashonly](https://github.com/bashonly) #### Downloader changes - **curl**: [Fix progress reporting](https://github.com/yt-dlp/yt-dlp/commit/66aeaac9aa30b5959069ba84e53a5508232deb38) by [pukkandan](https://github.com/pukkandan) - **fragment**: [Do not sleep between fragments](https://github.com/yt-dlp/yt-dlp/commit/424f3bf03305088df6e01d62f7311be8601ad3f4) by [pukkandan](https://github.com/pukkandan) #### Postprocessor changes - [Fix chapters if duration is not extracted](https://github.com/yt-dlp/yt-dlp/commit/01ddec7e661bf90dc4c34e6924eb9d7629886cef) ([#6037](https://github.com/yt-dlp/yt-dlp/issues/6037)) by [bashonly](https://github.com/bashonly) - [Print newline for `--progress-template`](https://github.com/yt-dlp/yt-dlp/commit/13ff78095372fd98900a32572cf817994c07ccb5) by [pukkandan](https://github.com/pukkandan) - **EmbedThumbnail, FFmpegMetadata**: [Fix error on attaching thumbnails and info json for mkv/mka](https://github.com/yt-dlp/yt-dlp/commit/0f0875ed555514f32522a0f30554fb08825d5124) ([#6647](https://github.com/yt-dlp/yt-dlp/issues/6647)) by [Lesmiscore](https://github.com/Lesmiscore) - **FFmpegFixupM3u8PP**: [Check audio codec before fixup](https://github.com/yt-dlp/yt-dlp/commit/3f7e2bd80e3c5d8a1682f20a1b245fcd974f295d) ([#6778](https://github.com/yt-dlp/yt-dlp/issues/6778)) by [bashonly](https://github.com/bashonly) - **FixupDuplicateMoov**: [Fix bug in triggering](https://github.com/yt-dlp/yt-dlp/commit/26010b5cec50193b98ad7845d1d77450f9f14c2b) by [pukkandan](https://github.com/pukkandan) #### Misc. changes - [Add automatic duplicate issue detection](https://github.com/yt-dlp/yt-dlp/commit/15b2d3db1d40b0437fca79d8874d392aa54b3cdd) by [pukkandan](https://github.com/pukkandan) - **build** - [Fix macOS target](https://github.com/yt-dlp/yt-dlp/commit/44a79958f0b596ee71e1eb25f158610aada29d1b) by [Grub4K](https://github.com/Grub4K) - [Implement build verification using `--update-to`](https://github.com/yt-dlp/yt-dlp/commit/b73193c99aa23b135732408a5fcf655c68d731c6) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - [Pin `pyinstaller` version for MacOS](https://github.com/yt-dlp/yt-dlp/commit/427a8fafbb0e18c28d0ed7960be838d7b26b88d3) by [pukkandan](https://github.com/pukkandan) - [Various build workflow improvements](https://github.com/yt-dlp/yt-dlp/commit/c4efa0aefec8daef1de62fd1693f13edf3c8b03c) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) - **cleanup** - Miscellaneous - [6f2287c](https://github.com/yt-dlp/yt-dlp/commit/6f2287cb18cbfb27518f068d868fa9390fee78ad) by [pukkandan](https://github.com/pukkandan) - [ad54c91](https://github.com/yt-dlp/yt-dlp/commit/ad54c9130e793ce433bf9da334fa80df9f3aee58) by [freezboltz](https://github.com/freezboltz), [mikf](https://github.com/mikf), [pukkandan](https://github.com/pukkandan) - **cleanup, utils**: [Split into submodules](https://github.com/yt-dlp/yt-dlp/commit/69bec6730ec9d724bcedeab199d9d684d61423ba) ([#7090](https://github.com/yt-dlp/yt-dlp/issues/7090)) by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) - **cli_to_api**: [Add script](https://github.com/yt-dlp/yt-dlp/commit/46f1370e9af6f8af8762f67e27e5acb8f0c48a47) by [pukkandan](https://github.com/pukkandan) - **devscripts**: `make_changelog`: [Various improvements](https://github.com/yt-dlp/yt-dlp/commit/23c39a4beadee382060bb47fdaa21316ca707d38) by [Grub4K](https://github.com/Grub4K) - **docs**: [Misc improvements](https://github.com/yt-dlp/yt-dlp/commit/c8bc203fbf3bb09914e53f0833eed622ab7edbb9) by [pukkandan](https://github.com/pukkandan) ### 2023.03.04 #### Extractor changes - bilibili - [Fix for downloading wrong subtitles](https://github.com/yt-dlp/yt-dlp/commit/8a83baaf218ab89e6e7faa76b7c7be3a2ec19e3a) ([#6358](https://github.com/yt-dlp/yt-dlp/issues/6358)) by [LXYan2333](https://github.com/LXYan2333) - ESPNcricinfo - [Handle new URL pattern](https://github.com/yt-dlp/yt-dlp/commit/640c934823fc2d1ec77ec932566078014058635f) ([#6321](https://github.com/yt-dlp/yt-dlp/issues/6321)) by [venkata-krishnas](https://github.com/venkata-krishnas) - lefigaro - [Add extractors](https://github.com/yt-dlp/yt-dlp/commit/eb8fd6d044e8926532772b72be0645c6b8ecb3aa) ([#6309](https://github.com/yt-dlp/yt-dlp/issues/6309)) by [elyse0](https://github.com/elyse0) - lumni - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/1f8489cccbdc6e96027ef527b88717458f0900e8) ([#6302](https://github.com/yt-dlp/yt-dlp/issues/6302)) by [carusocr](https://github.com/carusocr) - Prankcast - [Fix tags](https://github.com/yt-dlp/yt-dlp/commit/ed4cc4ea793314c50ae3f82e98248c1de1c25694) ([#6316](https://github.com/yt-dlp/yt-dlp/issues/6316)) by [columndeeply](https://github.com/columndeeply) - rutube - [Extract chapters from description](https://github.com/yt-dlp/yt-dlp/commit/22ccd5420b3eb0782776071f12cccd1fedaa1fd0) ([#6345](https://github.com/yt-dlp/yt-dlp/issues/6345)) by [mushbite](https://github.com/mushbite) - SportDeutschland - [Rewrite extractor](https://github.com/yt-dlp/yt-dlp/commit/45db357289b4e1eec09093c8bc5446520378f426) by [pukkandan](https://github.com/pukkandan) - telecaribe - [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/b40471282286bd2b09c485bf79afd271d229272c) ([#6311](https://github.com/yt-dlp/yt-dlp/issues/6311)) by [elyse0](https://github.com/elyse0) - tubetugraz - [Support `--twofactor` (#6424)](https://github.com/yt-dlp/yt-dlp/commit/f44cb4e77bb9be8be291d02ab6f79dc0b4c0d4a1) ([#6427](https://github.com/yt-dlp/yt-dlp/issues/6427)) by [Ferdi265](https://github.com/Ferdi265) - tunein - [Fix extractors](https://github.com/yt-dlp/yt-dlp/commit/46580ced56c90b559885aded6aa8f46f20a9cdce) ([#6310](https://github.com/yt-dlp/yt-dlp/issues/6310)) by [elyse0](https://github.com/elyse0) - twitch - [Update for GraphQL API changes](https://github.com/yt-dlp/yt-dlp/commit/4a6272c6d1bff89969b67cd22b26ebe6d7e72279) ([#6318](https://github.com/yt-dlp/yt-dlp/issues/6318)) by [elyse0](https://github.com/elyse0) - twitter - [Fix retweet extraction](https://github.com/yt-dlp/yt-dlp/commit/cf605226521e99c89fc8dff26a319025810e63a0) ([#6422](https://github.com/yt-dlp/yt-dlp/issues/6422)) by [selfisekai](https://github.com/selfisekai) - xvideos - quickies: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/283a0b5bc511f3b350eead4488158f50c20ec526) ([#6414](https://github.com/yt-dlp/yt-dlp/issues/6414)) by [Yakabuff](https://github.com/Yakabuff) #### Misc. changes - build - [Fix publishing to PyPI and homebrew](https://github.com/yt-dlp/yt-dlp/commit/55676fe498345a389a2539d8baaba958d6d61c3e) by [bashonly](https://github.com/bashonly) - [Only archive if `vars.ARCHIVE_REPO` is set](https://github.com/yt-dlp/yt-dlp/commit/08ff6d59f97b5f5f0128f6bf6fbef56fd836cc52) by [Grub4K](https://github.com/Grub4K) - cleanup - Miscellaneous: [392389b](https://github.com/yt-dlp/yt-dlp/commit/392389b7df7b818f794b231f14dc396d4875fbad) by [pukkandan](https://github.com/pukkandan) - devscripts - `make_changelog`: [Stop at `Release ...` commit](https://github.com/yt-dlp/yt-dlp/commit/7accdd9845fe7ce9d0aa5a9d16faaa489c1294eb) by [pukkandan](https://github.com/pukkandan) ### 2023.03.03 #### Important changes - **A new release type has been added!** * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs). * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`). * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades). * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags. * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG` - **YouTube throttling fixes!** #### Core changes - [Add option `--break-match-filters`](https://github.com/yt-dlp/yt-dlp/commit/fe2ce85aff0aa03735fc0152bb8cb9c3d4ef0753) by [pukkandan](https://github.com/pukkandan) - [Fix `--break-on-existing` with `--lazy-playlist`](https://github.com/yt-dlp/yt-dlp/commit/d21056f4cf0a1623daa107f9181074f5725ac436) by [pukkandan](https://github.com/pukkandan) - dependencies - [Simplify `Cryptodome`](https://github.com/yt-dlp/yt-dlp/commit/65f6e807804d2af5e00f2aecd72bfc43af19324a) by [pukkandan](https://github.com/pukkandan) - jsinterp - [Handle `Date` at epoch 0](https://github.com/yt-dlp/yt-dlp/commit/9acf1ee25f7ad3920ede574a9de95b8c18626af4) by [pukkandan](https://github.com/pukkandan) - plugins - [Don't look in `.egg` directories](https://github.com/yt-dlp/yt-dlp/commit/b059188383eee4fa336ef728dda3ff4bb7335625) by [pukkandan](https://github.com/pukkandan) - update - [Add option `--update-to`, including to nightly](https://github.com/yt-dlp/yt-dlp/commit/77df20f14cc9ed41dfe3a1fe2d77fd27f5365a94) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) - utils - `LenientJSONDecoder`: [Parse unclosed objects](https://github.com/yt-dlp/yt-dlp/commit/cc09083636ce21e58ff74f45eac2dbda507462b0) by [pukkandan](https://github.com/pukkandan) - `Popen`: [Shim undocumented `text_mode` property](https://github.com/yt-dlp/yt-dlp/commit/da8e2912b165005f76779a115a071cd6132ceedf) by [Grub4K](https://github.com/Grub4K) #### Extractor changes - [Fix DRM detection in m3u8](https://github.com/yt-dlp/yt-dlp/commit/43a3eaf96393b712d60cbcf5c6cb1e90ed7f42f5) by [pukkandan](https://github.com/pukkandan) - generic - [Detect manifest links via extension](https://github.com/yt-dlp/yt-dlp/commit/b38cae49e6f4849c8ee2a774bdc3c1c647ae5f0e) by [bashonly](https://github.com/bashonly) - [Handle basic-auth when checking redirects](https://github.com/yt-dlp/yt-dlp/commit/8e9fe43cd393e69fa49b3d842aa3180c1d105b8f) by [pukkandan](https://github.com/pukkandan) - GoogleDrive - [Fix some audio](https://github.com/yt-dlp/yt-dlp/commit/4d248e29d20d983ededab0b03d4fe69dff9eb4ed) by [pukkandan](https://github.com/pukkandan) - iprima - [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/9fddc12ab022a31754e0eaa358fc4e1dfa974587) ([#6291](https://github.com/yt-dlp/yt-dlp/issues/6291)) by [std-move](https://github.com/std-move) - mediastream - [Improve WinSports support](https://github.com/yt-dlp/yt-dlp/commit/2d5a8c5db2bd4ff1c2e45e00cd890a10f8ffca9e) ([#6401](https://github.com/yt-dlp/yt-dlp/issues/6401)) by [bashonly](https://github.com/bashonly) - ntvru - [Extract HLS and DASH formats](https://github.com/yt-dlp/yt-dlp/commit/77d6d136468d0c23c8e79bc937898747804f585a) ([#6403](https://github.com/yt-dlp/yt-dlp/issues/6403)) by [bashonly](https://github.com/bashonly) - tencent - [Add more formats and info](https://github.com/yt-dlp/yt-dlp/commit/18d295c9e0f95adc179eef345b7af64d6372db78) ([#5950](https://github.com/yt-dlp/yt-dlp/issues/5950)) by [Hill-98](https://github.com/Hill-98) - yle_areena - [Extract non-Kaltura videos](https://github.com/yt-dlp/yt-dlp/commit/40d77d89027cd0e0ce31d22aec81db3e1d433900) ([#6402](https://github.com/yt-dlp/yt-dlp/issues/6402)) by [bashonly](https://github.com/bashonly) - youtube - [Construct dash formats with `range` query](https://github.com/yt-dlp/yt-dlp/commit/5038f6d713303e0967d002216e7a88652401c22a) by [pukkandan](https://github.com/pukkandan) (With fixes in [f34804b](https://github.com/yt-dlp/yt-dlp/commit/f34804b2f920f62a6e893a14a9e2a2144b14dd23) by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz)) - [Detect and break on looping comments](https://github.com/yt-dlp/yt-dlp/commit/7f51861b1820c37b157a239b1fe30628d907c034) ([#6301](https://github.com/yt-dlp/yt-dlp/issues/6301)) by [coletdjnz](https://github.com/coletdjnz) - [Extract channel `view_count` when `/about` tab is passed](https://github.com/yt-dlp/yt-dlp/commit/31e183557fcd1b937582f9429f29207c1261f501) by [pukkandan](https://github.com/pukkandan) #### Misc. changes - build - [Add `cffi` as a dependency for `yt_dlp_linux`](https://github.com/yt-dlp/yt-dlp/commit/776d1c3f0c9b00399896dd2e40e78e9a43218109) by [bashonly](https://github.com/bashonly) - [Automated builds and nightly releases](https://github.com/yt-dlp/yt-dlp/commit/29cb20bd563c02671b31dd840139e93dd37150a1) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [bashonly](https://github.com/bashonly), [Grub4K](https://github.com/Grub4K) (With fixes in [bfc861a](https://github.com/yt-dlp/yt-dlp/commit/bfc861a91ee65c9b0ac169754f512e052c6827cf) by [pukkandan](https://github.com/pukkandan)) - [Sign SHA files and release public key](https://github.com/yt-dlp/yt-dlp/commit/12647e03d417feaa9ea6a458bea5ebd747494a53) by [Grub4K](https://github.com/Grub4K) - cleanup - [Fix `Changelog`](https://github.com/yt-dlp/yt-dlp/commit/17ca19ab60a6a13eb8a629c51442b5248b0d8394) by [pukkandan](https://github.com/pukkandan) - jsinterp: [Give functions names to help debugging](https://github.com/yt-dlp/yt-dlp/commit/b2e0343ba0fc5d8702e90f6ba2b71358e2677e0b) by [pukkandan](https://github.com/pukkandan) - Miscellaneous: [4815bbf](https://github.com/yt-dlp/yt-dlp/commit/4815bbfc41cf641e4a0650289dbff968cb3bde76), [5b28cef](https://github.com/yt-dlp/yt-dlp/commit/5b28cef72db3b531680d89c121631c73ae05354f) by [pukkandan](https://github.com/pukkandan) - devscripts - [Script to generate changelog](https://github.com/yt-dlp/yt-dlp/commit/d400e261cf029a3f20d364113b14de973be75404) ([#6220](https://github.com/yt-dlp/yt-dlp/issues/6220)) by [Grub4K](https://github.com/Grub4K) (With fixes in [9344964](https://github.com/yt-dlp/yt-dlp/commit/93449642815a6973a4b09b289982ca7e1f961b5f)) ### 2023.02.17 * Merge youtube-dl: Upto [commit/2dd6c6e](https://github.com/ytdl-org/youtube-dl/commit/2dd6c6e) * Fix `--concat-playlist` * Imply `--no-progress` when `--print` * Improve default subtitle language selection by [sdht0](https://github.com/sdht0) * Make `title` completely non-fatal * Sanitize formats before sorting by [pukkandan](https://github.com/pukkandan) * Support module level `__bool__` and `property` * [dependencies] Standardize `Cryptodome` imports * [hls] Allow extractors to provide AES key by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * [ExtractAudio] Handle outtmpl without ext by [carusocr](https://github.com/carusocr) * [extractor/common] Fix `_search_nuxt_data` by [LowSuggestion912](https://github.com/LowSuggestion912) * [extractor/generic] Avoid catastrophic backtracking in KVS regex by [bashonly](https://github.com/bashonly) * [jsinterp] Support `if` statements * [plugins] Fix zip search paths * [utils] `traverse_obj`: Various improvements by [Grub4K](https://github.com/Grub4K) * [utils] `traverse_obj`: Fix more bugs * [utils] `traverse_obj`: Fix several behavioral problems by [Grub4K](https://github.com/Grub4K) * [utils] Don't use Content-length with encoding by [felixonmars](https://github.com/felixonmars) * [utils] Fix `time_seconds` to use the provided TZ by [Grub4K](https://github.com/Grub4K), [Lesmiscore](https://github.com/Lesmiscore) * [utils] Fix race condition in `make_dir` by [aionescu](https://github.com/aionescu) * [utils] Use local kernel32 for file locking on Windows by [Grub4K](https://github.com/Grub4K) * [compat_utils] Improve `passthrough_module` * [compat_utils] Simplify `EnhancedModule` * [build] Update pyinstaller * [pyinst] Fix for pyinstaller 5.8 * [devscripts] Provide `pyinstaller` hooks * [devscripts/pyinstaller] Analyze sub-modules of `Cryptodome` * [cleanup] Misc fixes and cleanup * [extractor/anchorfm] Add episode extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) * [extractor/boxcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/ebay] Add extractor by [JChris246](https://github.com/JChris246) * [extractor/hypergryph] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [bashonly](https://github.com/bashonly) * [extractor/NZOnScreen] Add extractor by [gregsadetsky](https://github.com/gregsadetsky), [pukkandan](https://github.com/pukkandan) * [extractor/rozhlas] Add extractor RozhlasVltavaIE by [amra](https://github.com/amra) * [extractor/tempo] Add IVXPlayer extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/txxx] Add extractors by [chio0hai](https://github.com/chio0hai) * [extractor/vocaroo] Add extractor by [SuperSonicHub1](https://github.com/SuperSonicHub1), [qbnu](https://github.com/qbnu) * [extractor/wrestleuniverse] Add extractors by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * [extractor/yappy] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [dirkf](https://github.com/dirkf) * [extractor/youtube] **Fix `uploader_id` extraction** by [bashonly](https://github.com/bashonly) * [extractor/youtube] Add hyperpipe instances by [Generator](https://github.com/Generator) * [extractor/youtube] Handle `consent.youtube` * [extractor/youtube] Support `/live/` URL * [extractor/youtube] Update invidious and piped instances by [rohieb](https://github.com/rohieb) * [extractor/91porn] Fix title and comment extraction by [pmitchell86](https://github.com/pmitchell86) * [extractor/AbemaTV] Cache user token whenever appropriate by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/bfmtv] Support `rmc` prefix by [carusocr](https://github.com/carusocr) * [extractor/biliintl] Add intro and ending chapters by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/clyp] Support `wav` by [qulaz](https://github.com/qulaz) * [extractor/crunchyroll] Add intro chapter by [ByteDream](https://github.com/ByteDream) * [extractor/crunchyroll] Better message for premium videos * [extractor/crunchyroll] Fix incorrect premium-only error by [Grub4K](https://github.com/Grub4K) * [extractor/DouyuTV] Use new API by [hatienl0i261299](https://github.com/hatienl0i261299) * [extractor/embedly] Embedded links may be for other extractors * [extractor/freesound] Workaround invalid URL in webpage by [rebane2001](https://github.com/rebane2001) * [extractor/GoPlay] Use new API by [jeroenj](https://github.com/jeroenj) * [extractor/Hidive] Fix subtitles and age-restriction by [chexxor](https://github.com/chexxor) * [extractor/huya] Support HD streams by [felixonmars](https://github.com/felixonmars) * [extractor/moviepilot] Fix extractor by [panatexxa](https://github.com/panatexxa) * [extractor/nbc] Fix `NBC` and `NBCStations` extractors by [bashonly](https://github.com/bashonly) * [extractor/nbc] Fix XML parsing by [bashonly](https://github.com/bashonly) * [extractor/nebula] Remove broken cookie support by [hheimbuerger](https://github.com/hheimbuerger) * [extractor/nfl] Add `NFLPlus` extractors by [bashonly](https://github.com/bashonly) * [extractor/niconico] Add support for like history by [Matumo](https://github.com/Matumo), [pukkandan](https://github.com/pukkandan) * [extractor/nitter] Update instance list by [OIRNOIR](https://github.com/OIRNOIR) * [extractor/npo] Fix extractor and add HD support by [seproDev](https://github.com/seproDev) * [extractor/odkmedia] Add `OnDemandChinaEpisodeIE` by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) * [extractor/pornez] Handle relative URLs in iframe by [JChris246](https://github.com/JChris246) * [extractor/radiko] Fix format sorting for Time Free by [road-master](https://github.com/road-master) * [extractor/rcs] Fix extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) * [extractor/reddit] Support user posts by [OMEGARAZER](https://github.com/OMEGARAZER) * [extractor/rumble] Fix format sorting by [pukkandan](https://github.com/pukkandan) * [extractor/servus] Rewrite extractor by [Ashish0804](https://github.com/Ashish0804), [FrankZ85](https://github.com/FrankZ85), [StefanLobbenmeier](https://github.com/StefanLobbenmeier) * [extractor/slideslive] Fix slides and chapters/duration by [bashonly](https://github.com/bashonly) * [extractor/SportDeutschland] Fix extractor by [FriedrichRehren](https://github.com/FriedrichRehren) * [extractor/Stripchat] Fix extractor by [JChris246](https://github.com/JChris246), [bashonly](https://github.com/bashonly) * [extractor/tnaflix] Fix extractor by [bashonly](https://github.com/bashonly), [oxamun](https://github.com/oxamun) * [extractor/tvp] Support `stream.tvp.pl` by [selfisekai](https://github.com/selfisekai) * [extractor/twitter] Fix `--no-playlist` and add media `view_count` when using GraphQL by [Grub4K](https://github.com/Grub4K) * [extractor/twitter] Fix graphql extraction on some tweets by [selfisekai](https://github.com/selfisekai) * [extractor/vimeo] Fix `playerConfig` extraction by [LeoniePhiline](https://github.com/LeoniePhiline), [bashonly](https://github.com/bashonly) * [extractor/viu] Add `ViuOTTIndonesiaIE` extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/vk] Fix playlists for new API by [the-marenga](https://github.com/the-marenga) * [extractor/vlive] Replace with `VLiveWebArchiveIE` by [seproDev](https://github.com/seproDev) * [extractor/ximalaya] Update album `_VALID_URL` by [carusocr](https://github.com/carusocr) * [extractor/zdf] Use android API endpoint for UHD downloads by [seproDev](https://github.com/seproDev) * [extractor/drtv] Fix bug in [ab4cbef](https://github.com/yt-dlp/yt-dlp/commit/ab4cbef) by [bashonly](https://github.com/bashonly) ### 2023.01.06 * Fix config locations by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [downloader/aria2c] Disable native progress * [utils] `mimetype2ext`: `weba` is not standard * [utils] `windows_enable_vt_mode`: Better error handling * [build] Add minimal `pyproject.toml` * [update] Fix updater file removal on windows by [Grub4K](https://github.com/Grub4K) * [cleanup] Misc fixes and cleanup * [extractor/aitube] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/drtv] Add series extractors by [FrederikNS](https://github.com/FrederikNS) * [extractor/volejtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/xanimu] Add extractor by [JChris246](https://github.com/JChris246) * [extractor/youtube] Retry manifest refresh for live-from-start by [mzhou](https://github.com/mzhou) * [extractor/biliintl] Add `/media` to `VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/biliIntl] Add fallback to `video_data` by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/crunchyroll:show] Add `language` to entries by [Chrissi2812](https://github.com/Chrissi2812) * [extractor/joj] Fix extractor by [OndrejBakan](https://github.com/OndrejBakan), [pukkandan](https://github.com/pukkandan) * [extractor/nbc] Update graphql query by [jacobtruman](https://github.com/jacobtruman) * [extractor/reddit] Add subreddit as `channel_id` by [gschizas](https://github.com/gschizas) * [extractor/tiktok] Add `TikTokLive` extractor by [JC-Chung](https://github.com/JC-Chung) ### 2023.01.02 * **Improve plugin architecture** by [Grub4K](https://github.com/Grub4K), [coletdjnz](https://github.com/coletdjnz), [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) * Plugins can be loaded in any distribution of yt-dlp (binary, pip, source, etc.) and can be distributed and installed as packages. See [the readme](https://github.com/yt-dlp/yt-dlp/tree/05997b6e98e638d97d409c65bb5eb86da68f3b64#plugins) for more information * Add `--compat-options 2021,2022` * This allows devs to change defaults and make other potentially breaking changes more easily. If you need everything to work exactly as-is, put Use `--compat 2022` in your config to guard against future compat changes. * [downloader/aria2c] Native progress for aria2c via RPC by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) * Merge youtube-dl: Upto [commit/195f22f](https://github.com/ytdl-org/youtube-dl/commit/195f22f6) by [Grub4K](https://github.com/Grub4K), [pukkandan](https://github.com/pukkandan) * Add pre-processor stage `video` * Let `--parse/replace-in-metadata` run at any post-processing stage * Add `--enable-file-urls` by [coletdjnz](https://github.com/coletdjnz) * Add new field `aspect_ratio` * Add `ac4` to known codecs * Add `weba` to known extensions * [FFmpegVideoConvertor] Add `gif` to `--recode-video` * Add message when there are no subtitles/thumbnails * Deprioritize HEVC-over-FLV formats by [Lesmiscore](https://github.com/Lesmiscore) * Make early reject of `--match-filter` stricter * Fix `--cookies-from-browser` CLI parsing * Fix `original_url` in playlists * Fix bug in writing playlist info-json * Fix bugs in `PlaylistEntries` * [downloader/ffmpeg] Fix headers for video+audio formats by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * [extractor] Add a way to distinguish IEs that returns only videos * [extractor] Implement universal format sorting and deprecate `_sort_formats` * [extractor] Let `_extract_format` functions obey `--ignore-no-formats` * [extractor/generic] Add `fragment_query` extractor arg for DASH and HLS by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) * [extractor/generic] Decode unicode-escaped embed URLs by [bashonly](https://github.com/bashonly) * [extractor/generic] Don't report redirect to https * [extractor/generic] Fix JSON LD manifest extraction by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) * [extractor/generic] Use `Accept-Encoding: identity` for initial request by [coletdjnz](https://github.com/coletdjnz) * [FormatSort] Add `mov` to `vext` * [jsinterp] Escape regex that looks like nested set * [webvtt] Handle premature EOF by [flashdagger](https://github.com/flashdagger) * [utils] `classproperty`: Add cache support * [utils] `get_exe_version`: Detect broken executables by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [utils] `js_to_json`: Fix bug in [f55523c](https://github.com/yt-dlp/yt-dlp/commit/f55523c) by [ChillingPepper](https://github.com/ChillingPepper), [pukkandan](https://github.com/pukkandan) * [utils] Make `ExtractorError` mutable * [utils] Move `FileDownloader.parse_bytes` into utils * [utils] Move format sorting code into `utils` * [utils] `windows_enable_vt_mode`: Proper implementation by [Grub4K](https://github.com/Grub4K) * [update] Workaround [#5632](https://github.com/yt-dlp/yt-dlp/issues/5632) * [docs] Improvements * [cleanup] Misc fixes and cleanup * [cleanup] Use `random.choices` by [freezboltz](https://github.com/freezboltz) * [extractor/airtv] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/amazonminitv] Add extractors by [GautamMKGarg](https://github.com/GautamMKGarg), [nyuszika7h](https://github.com/nyuszika7h) * [extractor/beatbump] Add extractors by [Bobscorn](https://github.com/Bobscorn), [pukkandan](https://github.com/pukkandan) * [extractor/europarl] Add EuroParlWebstream extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/kanal2] Add extractor by [bashonly](https://github.com/bashonly), [glensc](https://github.com/glensc), [pukkandan](https://github.com/pukkandan) * [extractor/kankanews] Add extractor by [synthpop123](https://github.com/synthpop123) * [extractor/kick] Add extractor by [bashonly](https://github.com/bashonly) * [extractor/mediastream] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) * [extractor/noice] Add NoicePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/oneplace] Add OnePlacePodcast extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rumble] Add RumbleIE extractor by [flashdagger](https://github.com/flashdagger) * [extractor/screencastify] Add extractor by [bashonly](https://github.com/bashonly) * [extractor/trtcocuk] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/Veoh] Add user extractor by [tntmod54321](https://github.com/tntmod54321) * [extractor/videoken] Add extractors by [bashonly](https://github.com/bashonly) * [extractor/webcamerapl] Add extractor by [milkknife](https://github.com/milkknife) * [extractor/amazon] Add `AmazonReviews` extractor by [bashonly](https://github.com/bashonly) * [extractor/netverse] Add `NetverseSearch` extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/vimeo] Add `VimeoProIE` by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) * [extractor/xiami] Remove extractors by [synthpop123](https://github.com/synthpop123) * [extractor/youtube] Add `piped.video` by [Bnyro](https://github.com/Bnyro) * [extractor/youtube] Consider language in format de-duplication * [extractor/youtube] Extract DRC formats * [extractor/youtube] Fix `ytuser:` * [extractor/youtube] Fix bug in handling of music URLs * [extractor/youtube] Subtitles cannot be translated to `und` * [extractor/youtube:tab] Extract metadata from channel items by [coletdjnz](https://github.com/coletdjnz) * [extractor/ARD] Add vtt subtitles by [CapacitorSet](https://github.com/CapacitorSet) * [extractor/ArteTV] Extract chapters by [bashonly](https://github.com/bashonly), [iw0nderhow](https://github.com/iw0nderhow) * [extractor/bandcamp] Add `album_artist` by [stelcodes](https://github.com/stelcodes) * [extractor/bilibili] Fix `--no-playlist` for anthology * [extractor/bilibili] Improve `_VALID_URL` by [skbeh](https://github.com/skbeh) * [extractor/biliintl:series] Make partial download of series faster * [extractor/BiliLive] Fix extractor * [extractor/brightcove] Add `BrightcoveNewBaseIE` and fix embed extraction * [extractor/cda] Support premium and misc improvements by [selfisekai](https://github.com/selfisekai) * [extractor/ciscowebex] Support password-protected videos by [damianoamatruda](https://github.com/damianoamatruda) * [extractor/curiositystream] Fix auth by [mnn](https://github.com/mnn) * [extractor/embedly] Handle vimeo embeds * [extractor/fifa] Fix Preplay extraction by [dirkf](https://github.com/dirkf) * [extractor/foxsports] Fix extractor by [bashonly](https://github.com/bashonly) * [extractor/gronkh] Fix `_VALID_URL` by [muddi900](https://github.com/muddi900) * [extractor/hotstar] Improve format metadata * [extractor/iqiyi] Fix `Iq` JS regex by [bashonly](https://github.com/bashonly) * [extractor/la7] Improve extractor by [nixxo](https://github.com/nixxo) * [extractor/mediaset] Better embed detection and error messages by [nixxo](https://github.com/nixxo) * [extractor/mixch] Support `--wait-for-video` * [extractor/naver] Improve `_VALID_URL` for `NaverNowIE` by [bashonly](https://github.com/bashonly) * [extractor/naver] Treat fan subtitles as separate language * [extractor/netverse] Extract comments by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/nosnl] Add support for /video by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/odnoklassniki] Extract subtitles by [bashonly](https://github.com/bashonly) * [extractor/pinterest] Fix extractor by [bashonly](https://github.com/bashonly) * [extractor/plutotv] Fix videos with non-zero start by [digitall](https://github.com/digitall) * [extractor/polskieradio] Adapt to next.js redesigns by [selfisekai](https://github.com/selfisekai) * [extractor/reddit] Add vcodec to fallback format by [chengzhicn](https://github.com/chengzhicn) * [extractor/reddit] Extract crossposted media by [bashonly](https://github.com/bashonly) * [extractor/reddit] Extract video embeds in text posts by [bashonly](https://github.com/bashonly) * [extractor/rutube] Support private videos by [mexus](https://github.com/mexus) * [extractor/sibnet] Separate from VKIE * [extractor/slideslive] Fix extractor by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * [extractor/slideslive] Support embeds and slides by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) * [extractor/soundcloud] Support user permalink by [nosoop](https://github.com/nosoop) * [extractor/spankbang] Fix extractor by [JChris246](https://github.com/JChris246) * [extractor/stv] Detect DRM * [extractor/swearnet] Fix description bug * [extractor/tencent] Fix geo-restricted video by [elyse0](https://github.com/elyse0) * [extractor/tiktok] Fix subs, `DouyinIE`, improve `_VALID_URL` by [bashonly](https://github.com/bashonly) * [extractor/tiktok] Update `_VALID_URL`, add `api_hostname` arg by [bashonly](https://github.com/bashonly) * [extractor/tiktok] Update API hostname by [redraskal](https://github.com/redraskal) * [extractor/twitcasting] Fix videos with password by [Spicadox](https://github.com/Spicadox), [bashonly](https://github.com/bashonly) * [extractor/twitter] Heed `--no-playlist` for multi-video tweets by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * [extractor/twitter] Refresh guest token when expired by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * [extractor/twitter:spaces] Add `Referer` to m3u8 by [nixxo](https://github.com/nixxo) * [extractor/udemy] Fix lectures that have no URL and detect DRM * [extractor/unsupported] Add more URLs * [extractor/urplay] Support for audio-only formats by [barsnick](https://github.com/barsnick) * [extractor/wistia] Improve extension detection by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) * [extractor/yle_areena] Support restricted videos by [docbender](https://github.com/docbender) * [extractor/youku] Fix extractor by [KurtBestor](https://github.com/KurtBestor) * [extractor/youporn] Fix metadata by [marieell](https://github.com/marieell) * [extractor/redgifs] Fix bug in [8c188d5](https://github.com/yt-dlp/yt-dlp/commit/8c188d5d09177ed213a05c900d3523867c5897fd) ### 2022.11.11 * Merge youtube-dl: Upto [commit/de39d12](https://github.com/ytdl-org/youtube-dl/commit/de39d128) * Backport SSL configuration from Python 3.10 by [coletdjnz](https://github.com/coletdjnz) * Do more processing in `--flat-playlist` * Fix `--list` options not implying `-s` in some cases by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly) * Fix end time of clips by [cruel-efficiency](https://github.com/cruel-efficiency) * Fix for `formats=None` * Write API params in debug head * [outtmpl] Ensure ASCII in json and add option for Unicode * [SponsorBlock] Add `type` field, obey `--retry-sleep extractor`, relax duration check for large segments * [SponsorBlock] **Support `chapter` category** by [ajayyy](https://github.com/ajayyy), [pukkandan](https://github.com/pukkandan) * [ThumbnailsConvertor] Fix filename escaping by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [ModifyChapters] Handle the entire video being marked for removal * [embedthumbnail] Fix thumbnail name in mp3 by [How-Bout-No](https://github.com/How-Bout-No) * [downloader/fragment] HLS download can continue without first fragment * [cookies] Improve `LenientSimpleCookie` by [Grub4K](https://github.com/Grub4K) * [jsinterp] Improve separating regex * [extractor/common] Fix `fatal=False` for `_search_nuxt_data` * [extractor/common] Improve `_generic_title` * [extractor/common] Fix `json_ld` type checks by [Grub4K](https://github.com/Grub4K) * [extractor/generic] Separate embed extraction into own function * [extractor/generic:quoted-html] Add extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/unsupported] Raise error on known DRM-only sites by [coletdjnz](https://github.com/coletdjnz) * [utils] `js_to_json`: Improve escape handling by [Grub4K](https://github.com/Grub4K) * [utils] `strftime_or_none`: Workaround Python bug on Windows * [utils] `traverse_obj`: Always return list when branching, allow `re.Match` objects by [Grub4K](https://github.com/Grub4K) * [build, test] Harden workflows' security by [sashashura](https://github.com/sashashura) * [build] `py2exe`: Migrate to freeze API by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) * [build] Create `armv7l` and `aarch64` releases by [MrOctopus](https://github.com/MrOctopus), [pukkandan](https://github.com/pukkandan) * [build] Make linux binary truly standalone using `conda` by [mlampe](https://github.com/mlampe) * [build] Replace `set-output` with `GITHUB_OUTPUT` by [Lesmiscore](https://github.com/Lesmiscore) * [update] Use error code `100` for update errors * [compat] Fix `shutils.move` in restricted ACL mode on BSD by [ClosedPort22](https://github.com/ClosedPort22), [pukkandan](https://github.com/pukkandan) * [docs, devscripts] Document `pyinst`'s argument passthrough by [jahway603](https://github.com/jahway603) * [test] Allow `extract_flat` in download tests by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [Alienmaster](https://github.com/Alienmaster) * [extractor/aeon] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) * [extractor/agora] Add extractors by [selfisekai](https://github.com/selfisekai) * [extractor/camsoda] Add extractor by [zulaport](https://github.com/zulaport) * [extractor/cinetecamilano] Add extractor by [timendum](https://github.com/timendum) * [extractor/deuxm] Add extractors by [CrankDatSouljaBoy](https://github.com/CrankDatSouljaBoy) * [extractor/genius] Add extractors by [bashonly](https://github.com/bashonly) * [extractor/japandiet] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/listennotes] Add extractor by [lksj](https://github.com/lksj), [pukkandan](https://github.com/pukkandan) * [extractor/nos.nl] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/oftv] Add extractors by [DoubleCouponDay](https://github.com/DoubleCouponDay) * [extractor/podbayfm] Add extractor by [schnusch](https://github.com/schnusch) * [extractor/qingting] Add extractor by [bashonly](https://github.com/bashonly), [changren-wcr](https://github.com/changren-wcr) * [extractor/screen9] Add extractor by [tpikonen](https://github.com/tpikonen) * [extractor/swearnet] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/YleAreena] Add extractor by [pukkandan](https://github.com/pukkandan), [vitkhab](https://github.com/vitkhab) * [extractor/zeenews] Add extractor by [m4tu4g](https://github.com/m4tu4g), [pukkandan](https://github.com/pukkandan) * [extractor/youtube:tab] **Update tab handling for redesign** by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * Channel URLs download all uploads of the channel as multiple playlists, separated by tab * [extractor/youtube] Differentiate between no comments and disabled comments by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Extract `concurrent_view_count` for livestreams by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Fix `duration` for premieres by [nosoop](https://github.com/nosoop) * [extractor/youtube] Fix `live_status` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Ignore incomplete data error for comment replies by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Improve chapter parsing from description * [extractor/youtube] Mark videos as fully watched by [bsun0000](https://github.com/bsun0000) * [extractor/youtube] Update piped instances by [Generator](https://github.com/Generator) * [extractor/youtube] Update playlist metadata extraction for new layout by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube:tab] Fix video metadata from tabs by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube:tab] Let `approximate_date` return timestamp * [extractor/americastestkitchen] Fix extractor by [bashonly](https://github.com/bashonly) * [extractor/bbc] Support onion domains by [DoubleCouponDay](https://github.com/DoubleCouponDay) * [extractor/bilibili] Add chapters and misc cleanup by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) * [extractor/bilibili] Fix BilibiliIE and Bangumi extractors by [lockmatrix](https://github.com/lockmatrix), [pukkandan](https://github.com/pukkandan) * [extractor/bitchute] Better error for geo-restricted videos by [flashdagger](https://github.com/flashdagger) * [extractor/bitchute] Improve `BitChuteChannelIE` by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) * [extractor/bitchute] Simplify extractor by [flashdagger](https://github.com/flashdagger), [pukkandan](https://github.com/pukkandan) * [extractor/cda] Support login through API by [selfisekai](https://github.com/selfisekai) * [extractor/crunchyroll] Beta is now the only layout by [tejing1](https://github.com/tejing1) * [extractor/detik] Avoid unnecessary extraction * [extractor/doodstream] Remove extractor * [extractor/dplay] Add MotorTrendOnDemand extractor by [bashonly](https://github.com/bashonly) * [extractor/epoch] Support videos without data-trailer by [gibson042](https://github.com/gibson042), [pukkandan](https://github.com/pukkandan) * [extractor/fox] Extract thumbnail by [vitkhab](https://github.com/vitkhab) * [extractor/foxnews] Add `FoxNewsVideo` extractor * [extractor/hotstar] Add season support by [m4tu4g](https://github.com/m4tu4g) * [extractor/hotstar] Refactor v1 API calls * [extractor/iprima] Make json+ld non-fatal by [bashonly](https://github.com/bashonly) * [extractor/iq] Increase phantomjs timeout * [extractor/kaltura] Support playlists by [jwoglom](https://github.com/jwoglom), [pukkandan](https://github.com/pukkandan) * [extractor/lbry] Authenticate with cookies by [flashdagger](https://github.com/flashdagger) * [extractor/livestreamfails] Support posts by [invertico](https://github.com/invertico) * [extractor/mlb] Add `MLBArticle` extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/mxplayer] Improve extractor by [m4tu4g](https://github.com/m4tu4g) * [extractor/niconico] Always use HTTPS for requests * [extractor/nzherald] Support new video embed by [coletdjnz](https://github.com/coletdjnz) * [extractor/odnoklassniki] Support boosty.to embeds by [Lesmiscore](https://github.com/Lesmiscore), [megapro17](https://github.com/megapro17), [pukkandan](https://github.com/pukkandan) * [extractor/paramountplus] Update API token by [bashonly](https://github.com/bashonly) * [extractor/reddit] Add fallback format by [bashonly](https://github.com/bashonly) * [extractor/redgifs] Fix extractors by [bashonly](https://github.com/bashonly), [pukkandan](https://github.com/pukkandan) * [extractor/redgifs] Refresh auth token for 401 by [endotronic](https://github.com/endotronic), [pukkandan](https://github.com/pukkandan) * [extractor/rumble] Add HLS formats and extract more metadata by [flashdagger](https://github.com/flashdagger) * [extractor/sbs] Improve `_VALID_URL` by [bashonly](https://github.com/bashonly) * [extractor/skyit] Fix extractors by [nixxo](https://github.com/nixxo) * [extractor/stripchat] Fix hostname for HLS stream by [zulaport](https://github.com/zulaport) * [extractor/stripchat] Improve error message by [freezboltz](https://github.com/freezboltz) * [extractor/telegram] Add playlist support and more metadata by [bashonly](https://github.com/bashonly), [bsun0000](https://github.com/bsun0000) * [extractor/Tnaflix] Fix for HTTP 500 by [SG5](https://github.com/SG5), [pukkandan](https://github.com/pukkandan) * [extractor/tubitv] Better DRM detection by [bashonly](https://github.com/bashonly) * [extractor/tvp] Update extractors by [selfisekai](https://github.com/selfisekai) * [extractor/twitcasting] Fix `data-movie-playlist` extraction by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/twitter] Add onion site to `_VALID_URL` by [DoubleCouponDay](https://github.com/DoubleCouponDay) * [extractor/twitter] Add Spaces extractor and GraphQL API by [Grub4K](https://github.com/Grub4K), [bashonly](https://github.com/bashonly), [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) * [extractor/twitter] Support multi-video posts by [Grub4K](https://github.com/Grub4K) * [extractor/uktvplay] Fix `_VALID_URL` * [extractor/viu] Support subtitles of on-screen text by [tkgmomosheep](https://github.com/tkgmomosheep) * [extractor/VK] Fix playlist URLs by [the-marenga](https://github.com/the-marenga) * [extractor/vlive] Extract `release_timestamp` * [extractor/voot] Improve `_VALID_URL` by [freezboltz](https://github.com/freezboltz) * [extractor/wordpress:mb.miniAudioPlayer] Add embed extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/YoutubeWebArchive] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) * [extractor/zee5] Improve `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) * [extractor/zenyandex] Fix extractors by [lksj](https://github.com/lksj), [puc9](https://github.com/puc9), [pukkandan](https://github.com/pukkandan) ### 2022.10.04 * Allow a `set` to be passed as `download_archive` by [pukkandan](https://github.com/pukkandan), [bashonly](https://github.com/bashonly) * Allow open ranges for time ranges by [Lesmiscore](https://github.com/Lesmiscore) * Allow plugin extractors to replace the built-in ones * Don't download entire video when no matching `--download-sections` * Fix `--config-location -` * Improve [5736d79](https://github.com/yt-dlp/yt-dlp/pull/5044/commits/5736d79172c47ff84740d5720467370a560febad) * Fix for when playlists don't have `webpage_url` * Support environment variables in `--ffmpeg-location` * Workaround `libc_ver` not be available on Windows Store version of Python * [outtmpl] Curly braces to filter keys by [pukkandan](https://github.com/pukkandan) * [outtmpl] Make `%s` work in strfformat for all systems * [jsinterp] Workaround operator associativity issue * [cookies] Let `_get_mac_keyring_password` fail gracefully * [cookies] Parse cookies leniently by [Grub4K](https://github.com/Grub4K) * [phantomjs] Fix bug in [587021c](https://github.com/yt-dlp/yt-dlp/commit/587021cd9f717181b44e881941aca3f8d753758b) by [elyse0](https://github.com/elyse0) * [downloader/aria2c] Fix filename containing leading whitespace by [std-move](https://github.com/std-move) * [downloader/ism] Support ec-3 codec by [nixxo](https://github.com/nixxo) * [extractor] Fix `fatal=False` in `RetryManager` * [extractor] Improve json-ld extraction * [extractor] Make `_search_json` able to parse lists * [extractor] Escape `%` in `representation_id` of m3u8 * [extractor/generic] Pass through referer from json-ld * [utils] `base_url`: URL paths can contain `&` by [elyse0](https://github.com/elyse0) * [utils] `js_to_json`: Improve * [utils] `Popen.run`: Fix default return in binary mode * [utils] `traverse_obj`: Rewrite, document and add tests by [Grub4K](https://github.com/Grub4K) * [devscripts] `make_lazy_extractors`: Fix for Docker by [josanabr](https://github.com/josanabr) * [docs] Misc Improvements * [cleanup] Misc fixes and cleanup by [pukkandan](https://github.com/pukkandan), [gamer191](https://github.com/gamer191) * [extractor/24tv.ua] Add extractors by [coletdjnz](https://github.com/coletdjnz) * [extractor/BerufeTV] Add extractor by [Fabi019](https://github.com/Fabi019) * [extractor/booyah] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [elyse0](https://github.com/elyse0) * [extractor/bundesliga] Add extractor by [Fabi019](https://github.com/Fabi019) * [extractor/GoPlay] Add extractor by [CNugteren](https://github.com/CNugteren), [basrieter](https://github.com/basrieter), [jeroenj](https://github.com/jeroenj) * [extractor/iltalehti] Add extractor by [tpikonen](https://github.com/tpikonen) * [extractor/IsraelNationalNews] Add extractor by [Bobscorn](https://github.com/Bobscorn) * [extractor/mediaworksnzvod] Add extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/MicrosoftEmbed] Add extractor by [DoubleCouponDay](https://github.com/DoubleCouponDay) * [extractor/nbc] Add NBCStations extractor by [bashonly](https://github.com/bashonly) * [extractor/onenewsnz] Add extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/prankcast] Add extractor by [HobbyistDev](https://github.com/HobbyistDev), [columndeeply](https://github.com/columndeeply) * [extractor/Smotrim] Add extractor by [Lesmiscore](https://github.com/Lesmiscore), [nikita-moor](https://github.com/nikita-moor) * [extractor/tencent] Add Iflix extractor by [elyse0](https://github.com/elyse0) * [extractor/unscripted] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/adobepass] Add MSO AlticeOne (Optimum TV) by [CplPwnies](https://github.com/CplPwnies) * [extractor/youtube] **Download `post_live` videos from start** by [Lesmiscore](https://github.com/Lesmiscore), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Add support for Shorts audio pivot feed by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Detect `lazy-load-for-videos` embeds * [extractor/youtube] Do not warn on duplicate chapters * [extractor/youtube] Fix video like count extraction by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Support changing extraction language by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube:tab] Improve continuation items extraction * [extractor/youtube:tab] Support `reporthistory` page * [extractor/amazonstore] Fix JSON extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/amazonstore] Retry to avoid captcha page by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/animeondemand] Remove extractor by [TokyoBlackHole](https://github.com/TokyoBlackHole) * [extractor/anvato] Fix extractor and refactor by [bashonly](https://github.com/bashonly) * [extractor/artetv] Remove duplicate stream urls by [Grub4K](https://github.com/Grub4K) * [extractor/audioboom] Support direct URLs and refactor by [pukkandan](https://github.com/pukkandan), [tpikonen](https://github.com/tpikonen) * [extractor/bandcamp] Extract `uploader_url` * [extractor/bilibili] Add space.bilibili extractors by [lockmatrix](https://github.com/lockmatrix) * [extractor/BilibiliSpace] Fix extractor and better error message by [lockmatrix](https://github.com/lockmatrix) * [extractor/BiliIntl] Support uppercase lang in `_VALID_URL` by [coletdjnz](https://github.com/coletdjnz) * [extractor/BiliIntlSeries] Fix `_VALID_URL` * [extractor/bongacams] Update `_VALID_URL` by [0xGodspeed](https://github.com/0xGodspeed) * [extractor/crunchyroll:beta] Improve handling of hardsubs by [Grub4K](https://github.com/Grub4K) * [extractor/detik] Generalize extractors by [HobbyistDev](https://github.com/HobbyistDev), [coletdjnz](https://github.com/coletdjnz) * [extractor/dplay:italy] Add default authentication by [Timendum](https://github.com/Timendum) * [extractor/heise] Fix extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/holodex] Fix `_VALID_URL` by [LiviaMedeiros](https://github.com/LiviaMedeiros) * [extractor/hrfensehen] Fix extractor by [snapdgn](https://github.com/snapdgn) * [extractor/hungama] Add subtitle by [GautamMKGarg](https://github.com/GautamMKGarg), [pukkandan](https://github.com/pukkandan) * [extractor/instagram] Extract more metadata by [pritam20ps05](https://github.com/pritam20ps05) * [extractor/JWPlatform] Fix extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/malltv] Fix video_id extraction by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/MLBTV] Detect live streams * [extractor/motorsport] Support native embeds * [extractor/Mxplayer] Fix extractor by [itachi-19](https://github.com/itachi-19) * [extractor/nebula] Add nebula.tv by [tannertechnology](https://github.com/tannertechnology) * [extractor/nfl] Fix extractor by [bashonly](https://github.com/bashonly) * [extractor/ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack) * [extractor/paramountplus] Better DRM detection by [bashonly](https://github.com/bashonly) * [extractor/patreon] Sort formats * [extractor/rcs] Fix embed extraction by [coletdjnz](https://github.com/coletdjnz) * [extractor/redgifs] Fix extractor by [jhwgh1968](https://github.com/jhwgh1968) * [extractor/rutube] Fix `_EMBED_REGEX` by [coletdjnz](https://github.com/coletdjnz) * [extractor/RUTV] Fix warnings for livestreams by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/soundcloud:search] More metadata in `--flat-playlist` by [SuperSonicHub1](https://github.com/SuperSonicHub1) * [extractor/telegraaf] Use mobile GraphQL API endpoint by [coletdjnz](https://github.com/coletdjnz) * [extractor/tennistv] Fix timestamp by [zenerdi0de](https://github.com/zenerdi0de) * [extractor/tiktok] Fix TikTokIE by [bashonly](https://github.com/bashonly) * [extractor/triller] Fix auth token by [bashonly](https://github.com/bashonly) * [extractor/trovo] Fix extractors by [Mehavoid](https://github.com/Mehavoid) * [extractor/tv2] Support new url format by [tobi1805](https://github.com/tobi1805) * [extractor/web.archive:youtube] Fix `_YT_INITIAL_PLAYER_RESPONSE_RE` * [extractor/wistia] Add support for channels by [coletdjnz](https://github.com/coletdjnz) * [extractor/wistia] Match IDs in embed URLs by [bashonly](https://github.com/bashonly) * [extractor/wordpress:playlist] Add generic embed extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/yandexvideopreview] Update `_VALID_URL` by [Grub4K](https://github.com/Grub4K) * [extractor/zee5] Fix `_VALID_URL` by [m4tu4g](https://github.com/m4tu4g) * [extractor/zee5] Generate device ids by [freezboltz](https://github.com/freezboltz) ### 2022.09.01 * Add option `--use-extractors` * Merge youtube-dl: Upto [commit/ed5c44e](https://github.com/ytdl-org/youtube-dl/commit/ed5c44e7) * Add yt-dlp version to infojson * Fix `--break-per-url --max-downloads` * Fix bug in `--alias` * [cookies] Support firefox container in `--cookies-from-browser` by [bashonly](https://github.com/bashonly), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [downloader/external] Smarter detection of executable * [extractor/generic] Don't return JW player without formats * [FormatSort] Fix `aext` for `--prefer-free-formats` * [jsinterp] Various improvements by [pukkandan](https://github.com/pukkandan), [dirkf](https://github.com/dirkf), [elyse0](https://github.com/elyse0) * [cache] Mechanism to invalidate old cache * [utils] Add `deprecation_warning` * [utils] Add `orderedSet_from_options` * [utils] `Popen`: Restore `LD_LIBRARY_PATH` when using PyInstaller by [Lesmiscore](https://github.com/Lesmiscore) * [build] `make tar` should not follow `DESTDIR` by [satan1st](https://github.com/satan1st) * [build] Update pyinstaller by [shirt-dev](https://github.com/shirt-dev) * [test] Fix `test_youtube_signature` * [cleanup] Misc fixes and cleanup by [DavidH-2022](https://github.com/DavidH-2022), [MrRawes](https://github.com/MrRawes), [pukkandan](https://github.com/pukkandan) * [extractor/epoch] Add extractor by [tejasa97](https://github.com/tejasa97) * [extractor/eurosport] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/IslamChannel] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/newspicks] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/triller] Add extractor by [bashonly](https://github.com/bashonly) * [extractor/VQQ] Add extractors by [elyse0](https://github.com/elyse0) * [extractor/youtube] Improvements to nsig extraction * [extractor/youtube] Fix bug in format sorting * [extractor/youtube] Update iOS Innertube clients by [SamantazFox](https://github.com/SamantazFox) * [extractor/youtube] Use device-specific user agent by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Add `--compat-option no-youtube-prefer-utc-upload-date` by [coletdjnz](https://github.com/coletdjnz) * [extractor/arte] Bug fix by [cgrigis](https://github.com/cgrigis) * [extractor/bilibili] Extract `flac` with premium account by [jackyyf](https://github.com/jackyyf) * [extractor/BiliBiliSearch] Don't sort by date * [extractor/BiliBiliSearch] Fix infinite loop * [extractor/bitchute] Mark errors as expected * [extractor/crunchyroll:beta] Use anonymous access by [tejing1](https://github.com/tejing1) * [extractor/huya] Fix stream extraction by [ohaiibuzzle](https://github.com/ohaiibuzzle) * [extractor/medaltv] Fix extraction by [xenova](https://github.com/xenova) * [extractor/mediaset] Fix embed extraction * [extractor/mixcloud] All formats are audio-only * [extractor/rtbf] Fix jwt extraction by [elyse0](https://github.com/elyse0) * [extractor/screencastomatic] Support `--video-password` by [shreyasminocha](https://github.com/shreyasminocha) * [extractor/stripchat] Don't modify input URL by [dfaker](https://github.com/dfaker) * [extractor/uktv] Improve `_VALID_URL` by [dirkf](https://github.com/dirkf) * [extractor/vimeo:user] Fix `_VALID_URL` ### 2022.08.19 * Fix bug in `--download-archive` * [jsinterp] **Fix for new youtube players** and related improvements by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [phantomjs] Add function to execute JS without a DOM by [MinePlayersPE](https://github.com/MinePlayersPE), [pukkandan](https://github.com/pukkandan) * [build] Exclude devscripts from installs by [Lesmiscore](https://github.com/Lesmiscore) * [cleanup] Misc fixes and cleanup * [extractor/youtube] **Add fallback to phantomjs** for nsig * [extractor/youtube] Fix error reporting of "Incomplete data" * [extractor/youtube] Improve format sorting for IOS formats * [extractor/youtube] Improve signature caching * [extractor/instagram] Fix extraction by [bashonly](https://github.com/bashonly), [pritam20ps05](https://github.com/pritam20ps05) * [extractor/rai] Minor fix by [nixxo](https://github.com/nixxo) * [extractor/rtbf] Fix stream extractor by [elyse0](https://github.com/elyse0) * [extractor/SovietsCloset] Fix extractor by [ChillingPepper](https://github.com/ChillingPepper) * [extractor/zattoo] Fix Zattoo resellers by [goggle](https://github.com/goggle) ### 2022.08.14 * Merge youtube-dl: Upto [commit/d231b56](https://github.com/ytdl-org/youtube-dl/commit/d231b56) * [jsinterp] Handle **new youtube signature functions** * [jsinterp] Truncate error messages * [extractor] Fix format sorting of `channels` * [ffmpeg] Disable avconv unless `--prefer-avconv` * [ffmpeg] Smarter detection of ffprobe filename * [embedthumbnail] Detect `libatomicparsley.so` * [ThumbnailsConvertor] Fix conversion after `fixup_webp` * [utils] Fix `get_compatible_ext` * [build] Fix changelog * [update] Set executable bit-mask by [pukkandan](https://github.com/pukkandan), [Lesmiscore](https://github.com/Lesmiscore) * [devscripts] Fix import * [docs] Consistent use of `e.g.` by [Lesmiscore](https://github.com/Lesmiscore) * [cleanup] Misc fixes and cleanup * [extractor/moview] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/parler] Add extractor by [palewire](https://github.com/palewire) * [extractor/patreon] Ignore erroneous media attachments by [coletdjnz](https://github.com/coletdjnz) * [extractor/truth] Add extractor by [palewire](https://github.com/palewire) * [extractor/aenetworks] Add formats parameter by [jacobtruman](https://github.com/jacobtruman) * [extractor/crunchyroll] Improve `_VALID_URL`s * [extractor/doodstream] Add `wf` domain by [aldoridhoni](https://github.com/aldoridhoni) * [extractor/facebook] Add reel support by [bashonly](https://github.com/bashonly) * [extractor/MLB] New extractor by [ischmidt20](https://github.com/ischmidt20) * [extractor/rai] Misc fixes by [nixxo](https://github.com/nixxo) * [extractor/toggo] Improve `_VALID_URL` by [masta79](https://github.com/masta79) * [extractor/tubitv] Extract additional formats by [shirt-dev](https://github.com/shirt-dev) * [extractor/zattoo] Potential fix for resellers ### 2022.08.08 * **Remove Python 3.6 support** * Determine merge container better by [pukkandan](https://github.com/pukkandan), [selfisekai](https://github.com/selfisekai) * Framework for embed detection by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * Merge youtube-dl: Upto [commit/adb5294](https://github.com/ytdl-org/youtube-dl/commit/adb5294) * `--compat-option no-live-chat` should disable danmaku * Fix misleading DRM message * Import ctypes only when necessary * Minor bugfixes * Reject entire playlists faster with `--match-filter` * Remove filtered entries from `-J` * Standardize retry mechanism * Validate `--merge-output-format` * [downloader] Add average speed to final progress line * [extractor] Add field `audio_channels` * [extractor] Support multiple archive ids for one video * [ffmpeg] Set `ffmpeg_location` in a contextvar * [FFmpegThumbnailsConvertor] Fix conversion from GIF * [MetadataParser] Don't set `None` when the field didn't match * [outtmpl] Smarter replacing of unsupported characters * [outtmpl] Treat empty values as None in filenames * [utils] sanitize_open: Allow any IO stream as stdout * [build, devscripts] Add devscript to set a build variant * [build] Improve build process by [shirt-dev](https://github.com/shirt-dev) * [build] Update pyinstaller * [devscripts] Create `utils` and refactor * [docs] Clarify `best*` * [docs] Fix bug report issue template * [docs] Fix capitalization in references by [christoph-heinrich](https://github.com/christoph-heinrich) * [cleanup, mhtml] Use imghdr * [cleanup, utils] Consolidate known media extensions * [cleanup] Misc fixes and cleanup * [extractor/angel] Add extractor by [AxiosDeminence](https://github.com/AxiosDeminence) * [extractor/dplay] Add MotorTrend extractor by [Sipherdrakon](https://github.com/Sipherdrakon) * [extractor/harpodeon] Add extractor by [eren-kemer](https://github.com/eren-kemer) * [extractor/holodex] Add extractor by [pukkandan](https://github.com/pukkandan), [sqrtNOT](https://github.com/sqrtNOT) * [extractor/kompas] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rai] Add raisudtirol extractor by [nixxo](https://github.com/nixxo) * [extractor/tempo] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/youtube] **Fixes for third party client detection** by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Add `live_status=post_live` by [lazypete365](https://github.com/lazypete365) * [extractor/youtube] Extract more format info * [extractor/youtube] Parse translated subtitles only when requested * [extractor/youtube, extractor/twitch] Allow waiting for channels to become live * [extractor/youtube, webvtt] Extract auto-subs from livestream VODs by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [extractor/AbemaTVTitle] Implement paging by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/archiveorg] Improve handling of formats by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/arte] Fix title extraction * [extractor/arte] **Move to v2 API** by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [extractor/bbc] Fix news articles by [ajj8](https://github.com/ajj8) * [extractor/camtasia] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/cloudflarestream] Fix video_id padding by [haobinliang](https://github.com/haobinliang) * [extractor/crunchyroll] Fix conversion of thumbnail from GIF * [extractor/crunchyroll] Handle missing metadata correctly by [Burve](https://github.com/Burve), [pukkandan](https://github.com/pukkandan) * [extractor/crunchyroll:beta] Extract timestamp and fix tests by [tejing1](https://github.com/tejing1) * [extractor/crunchyroll:beta] Use streams API by [tejing1](https://github.com/tejing1) * [extractor/doodstream] Support more domains by [Galiley](https://github.com/Galiley) * [extractor/ESPN] Extract duration by [ischmidt20](https://github.com/ischmidt20) * [extractor/FIFA] Change API endpoint by [Bricio](https://github.com/Bricio), [yashkc2025](https://github.com/yashkc2025) * [extractor/globo:article] Remove false positives by [Bricio](https://github.com/Bricio) * [extractor/Go] Extract timestamp by [ischmidt20](https://github.com/ischmidt20) * [extractor/hidive] Fix cookie login when netrc is also given by [winterbird-code](https://github.com/winterbird-code) * [extractor/html5] Separate into own extractor by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/ina] Improve extractor by [elyse0](https://github.com/elyse0) * [extractor/NaverNow] Change endpoint by [ping](https://github.com/ping) * [extractor/ninegag] Extract uploader by [DjesonPV](https://github.com/DjesonPV) * [extractor/NovaPlay] Fix extractor by [Bojidarist](https://github.com/Bojidarist) * [extractor/orf:radio] Rewrite extractors * [extractor/patreon] Fix and improve extractors by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/rai] Fix RaiNews extraction by [nixxo](https://github.com/nixxo) * [extractor/redbee] Unify and update extractors by [elyse0](https://github.com/elyse0) * [extractor/stripchat] Fix _VALID_URL by [freezboltz](https://github.com/freezboltz) * [extractor/tubi] Exclude playlists from playlist entries by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/tviplayer] Improve `_VALID_URL` by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/twitch] Extract chapters for single chapter VODs by [mpeter50](https://github.com/mpeter50) * [extractor/vgtv] Support tv.vg.no by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/vidio] Support embed link by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/vk] Fix extractor by [Mehavoid](https://github.com/Mehavoid) * [extractor/WASDTV:record] Fix `_VALID_URL` * [extractor/xfileshare] Add Referer by [Galiley](https://github.com/Galiley) * [extractor/YahooJapanNews] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/yandexmusic] Extract higher quality format * [extractor/zee5] Update Device ID by [m4tu4g](https://github.com/m4tu4g) ### 2022.07.18 * Allow users to specify encoding in each config files by [Lesmiscore](https://github.com/Lesmiscore) * Discard infodict from memory if no longer needed * Do not allow extractors to return `None` * Do not load system certificates when `certifi` is used * Fix rounding of integers in format table * Improve chapter sanitization * Skip some fixup if remux/recode is needed by [Lesmiscore](https://github.com/Lesmiscore) * Support `--no-progress` for `--wait-for-video` * Fix bug in [612f2be](https://github.com/yt-dlp/yt-dlp/commit/612f2be5d3924540158dfbe5f25d841f04cff8c6) * [outtmpl] Add alternate form `h` for HTML escaping * [aes] Add multiple padding modes in CBC by [elyse0](https://github.com/elyse0) * [extractor/common] Passthrough `errnote=False` to parsers * [extractor/generic] Remove HEAD request * [http] Ensure the file handle is always closed * [ModifyChapters] Modify duration in infodict * [options] Fix aliases to `--config-location` * [utils] Fix `get_domain` * [build] Consistent order for lazy extractors by [lamby](https://github.com/lamby) * [build] Fix architecture suffix of executables by [odo2063](https://github.com/odo2063) * [build] Improve `setup.py` * [update] Do not check `_update_spec` when up to date * [update] Prepare to remove Python 3.6 support * [compat] Let PyInstaller detect _legacy module * [devscripts/update-formulae] Do not change dependency section * [test] Split download tests so they can be more easily run in CI * [docs] Improve docstring of `download_ranges` by [FirefoxMetzger](https://github.com/FirefoxMetzger) * [docs] Improve issue templates * [build] Fix bug in [6d916fe](https://github.com/yt-dlp/yt-dlp/commit/6d916fe709a38e8c4c69b73843acf170b5165931) * [cleanup, utils] Refactor parse_codecs * [cleanup] Misc fixes and cleanup * [extractor/acfun] Add extractors by [lockmatrix](https://github.com/lockmatrix) * [extractor/Audiodraft] Add extractors by [Ashish0804](https://github.com/Ashish0804), [fstirlitz](https://github.com/fstirlitz) * [extractor/cellebrite] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/detik] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/hytale] Add extractor by [llamasblade](https://github.com/llamasblade), [pukkandan](https://github.com/pukkandan) * [extractor/liputan6] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/mocha] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rtl.lu] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/rtvsl] Add extractor by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) * [extractor/StarTrek] Add extractor by [scy](https://github.com/scy) * [extractor/syvdk] Add extractor by [misaelaguayo](https://github.com/misaelaguayo) * [extractor/theholetv] Add extractor by [dosy4ev](https://github.com/dosy4ev) * [extractor/TubeTuGraz] Add extractor by [Ferdi265](https://github.com/Ferdi265), [pukkandan](https://github.com/pukkandan) * [extractor/tviplayer] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/wetv] Add extractors by [elyse0](https://github.com/elyse0) * [extractor/wikimedia] Add extractor by [EhtishamSabir](https://github.com/EhtishamSabir), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Fix duration check for post-live manifestless mode * [extractor/youtube] More metadata for storyboards by [ftk](https://github.com/ftk) * [extractor/bigo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/BiliIntl] Fix subtitle extraction by [MinePlayersPE](https://github.com/MinePlayersPE) * [extractor/crunchyroll] Improve `_VALID_URL` * [extractor/fifa] Fix extractor by [ischmidt20](https://github.com/ischmidt20) * [extractor/instagram] Fix post/story extractors by [pritam20ps05](https://github.com/pritam20ps05), [pukkandan](https://github.com/pukkandan) * [extractor/iq] Set language correctly for Korean subtitles * [extractor/MangoTV] Fix subtitle languages * [extractor/Netverse] Improve playlist extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/philharmoniedeparis] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/Trovo] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [extractor/twitch] Support storyboards for VODs by [ftk](https://github.com/ftk) * [extractor/WatchESPN] Improve `_VALID_URL` by [IONECarter](https://github.com/IONECarter), [dirkf](https://github.com/dirkf) * [extractor/WSJArticle] Fix video id extraction by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/Ximalaya] Fix extractors by [lockmatrix](https://github.com/lockmatrix) * [cleanup, extractor/youtube] Fix tests by [sheerluck](https://github.com/sheerluck) ### 2022.06.29 * Fix `--downloader native` * Fix `section_end` of clips * Fix playlist error handling * Sanitize `chapters` * [extractor] Fix `_create_request` when headers is None * [extractor] Fix empty `BaseURL` in MPD * [ffmpeg] Write full output to debug on error * [hls] Warn user when trying to download live HLS * [options] Fix `parse_known_args` for `--` * [utils] Fix inconsistent default handling between HTTP and HTTPS requests by [coletdjnz](https://github.com/coletdjnz) * [build] Draft release until complete * [build] Fix release tag commit * [build] Standalone x64 builds for MacOS 10.9 by [StefanLobbenmeier](https://github.com/StefanLobbenmeier) * [update] Ability to set a maximum version for specific variants * [compat] Fix `compat.WINDOWS_VT_MODE` * [compat] Remove deprecated functions from core code * [compat] Remove more functions * [cleanup, extractor] Reduce direct use of `_downloader` * [cleanup] Consistent style for file heads * [cleanup] Fix some typos by [crazymoose77756](https://github.com/crazymoose77756) * [cleanup] Misc fixes and cleanup * [extractor/Scrolller] Add extractor by [LunarFang416](https://github.com/LunarFang416) * [extractor/ViMP] Add playlist extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [extractor/fuyin] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/livestreamfails] Add extractor by [nomevi](https://github.com/nomevi) * [extractor/premiershiprugby] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/steam] Add broadcast extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/youtube] Mark videos as fully watched by [Brett824](https://github.com/Brett824) * [extractor/CWTV] Extract thumbnail by [ischmidt20](https://github.com/ischmidt20) * [extractor/ViMP] Add thumbnail and support more sites by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [extractor/dropout] Support cookies and login only as needed by [pingiun](https://github.com/pingiun), [pukkandan](https://github.com/pukkandan) * [extractor/ertflix] Improve `_VALID_URL` * [extractor/lbry] Use HEAD request for redirect URL by [flashdagger](https://github.com/flashdagger) * [extractor/mediaset] Improve `_VALID_URL` * [extractor/npr] Implement [e50c350](https://github.com/yt-dlp/yt-dlp/commit/e50c3500b43d80e4492569c4b4523c4379c6fbb2) differently * [extractor/tennistv] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [zenerdi0de](https://github.com/zenerdi0de) ### 2022.06.22.1 * [build] Fix updating homebrew formula ### 2022.06.22 * [**Deprecate support for Python 3.6**](https://github.com/yt-dlp/yt-dlp/issues/3764#issuecomment-1154051119) * **Add option `--download-sections` to download video partially** * Chapter regex and time ranges are accepted, e.g. `--download-sections *1:10-2:20` * Add option `--alias` * Add option `--lazy-playlist` to process entries as they are received * Add option `--retry-sleep` * Add slicing notation to `--playlist-items` * Adds support for negative indices and step * Add `-I` as alias for `--playlist-index` * Makes `--playlist-start`, `--playlist-end`, `--playlist-reverse`, `--no-playlist-reverse` redundant * `--config-location -` to provide options interactively * [build] Add Linux standalone builds * [update] Self-restart after update * Merge youtube-dl: Upto [commit/8a158a9](https://github.com/ytdl-org/youtube-dl/commit/8a158a9) * Add `--no-update` * Allow extractors to specify section_start/end for clips * Do not print progress to `stderr` with `-q` * Ensure pre-processor errors do not block video download * Fix `--simulate --max-downloads` * Improve error handling of bad config files * Return an error code if update fails * Fix bug in [3a408f9](https://github.com/yt-dlp/yt-dlp/commit/3a408f9d199127ca2626359e21a866a09ab236b3) * [ExtractAudio] Allow conditional conversion * [ModifyChapters] Fix repeated removal of small segments * [ThumbnailsConvertor] Allow conditional conversion * [cookies] Detect profiles for cygwin/BSD by [moench-tegeder](https://github.com/moench-tegeder) * [dash] Show fragment count with `--live-from-start` by [flashdagger](https://github.com/flashdagger) * [extractor] Add `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor] Add `default` parameter to `_search_json` by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor] Add dev option `--load-pages` * [extractor] Handle `json_ld` with multiple `@type`s * [extractor] Import `_ALL_CLASSES` lazily * [extractor] Recognize `src` attribute from HTML5 media elements by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/generic] Revert e6ae51c123897927eb3c9899923d8ffd31c7f85d * [f4m] Bugfix * [ffmpeg] Check version lazily * [jsinterp] Some optimizations and refactoring by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [utils] Improve performance using `functools.cache` * [utils] Send HTTP/1.1 ALPN extension by [coletdjnz](https://github.com/coletdjnz) * [utils] `ExtractorError`: Fix `exc_info` * [utils] `ISO3166Utils`: Add `EU` and `AP` * [utils] `Popen`: Refactor to use contextmanager * [utils] `locked_file`: Fix for PyPy on Windows * [update] Expose more functionality to API * [update] Use `.git` folder to distinguish `source`/`unknown` * [compat] Add `functools.cached_property` * [test] Fix `FakeYDL` signatures by [coletdjnz](https://github.com/coletdjnz) * [docs] Improvements * [cleanup, ExtractAudio] Refactor * [cleanup, downloader] Refactor `report_progress` * [cleanup, extractor] Refactor `_download_...` methods * [cleanup, extractor] Rename `extractors.py` to `_extractors.py` * [cleanup, utils] Don't use kwargs for `format_field` * [cleanup, build] Refactor * [cleanup, docs] Re-indent "Usage and Options" section * [cleanup] Deprecate `YoutubeDL.parse_outtmpl` * [cleanup] Misc fixes and cleanup by [Lesmiscore](https://github.com/Lesmiscore), [MrRawes](https://github.com/MrRawes), [christoph-heinrich](https://github.com/christoph-heinrich), [flashdagger](https://github.com/flashdagger), [gamer191](https://github.com/gamer191), [kwconder](https://github.com/kwconder), [pukkandan](https://github.com/pukkandan) * [extractor/DailyWire] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) * [extractor/fourzerostudio] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/GoogleDrive] Add folder extractor by [evansp](https://github.com/evansp), [pukkandan](https://github.com/pukkandan) * [extractor/MirrorCoUK] Add extractor by [LunarFang416](https://github.com/LunarFang416), [pukkandan](https://github.com/pukkandan) * [extractor/atscaleconfevent] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [extractor/freetv] Add extractor by [elyse0](https://github.com/elyse0) * [extractor/ixigua] Add Extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/kicker.de] Add extractor by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/netverse] Add extractors by [HobbyistDev](https://github.com/HobbyistDev), [pukkandan](https://github.com/pukkandan) * [extractor/playsuisse] Add extractor by [pukkandan](https://github.com/pukkandan), [sbor23](https://github.com/sbor23) * [extractor/substack] Add extractor by [elyse0](https://github.com/elyse0) * [extractor/youtube] **Support downloading clips** * [extractor/youtube] Add `innertube_host` and `innertube_key` extractor args by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Add warning for PostLiveDvr * [extractor/youtube] Bring back `_extract_chapters_from_description` * [extractor/youtube] Extract `comment_count` from webpage * [extractor/youtube] Fix `:ytnotifications` extractor by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Fix initial player response extraction by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [extractor/youtube] Fix live chat for videos with content warning by [coletdjnz](https://github.com/coletdjnz) * [extractor/youtube] Make signature extraction non-fatal * [extractor/youtube:tab] Detect `videoRenderer` in `_post_thread_continuation_entries` * [extractor/BiliIntl] Fix metadata extraction * [extractor/BiliIntl] Fix subtitle extraction by [HobbyistDev](https://github.com/HobbyistDev) * [extractor/FranceCulture] Fix extractor by [aurelg](https://github.com/aurelg), [pukkandan](https://github.com/pukkandan) * [extractor/PokemonSoundLibrary] Remove extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/StreamCZ] Fix extractor by [adamanldo](https://github.com/adamanldo), [dirkf](https://github.com/dirkf) * [extractor/WatchESPN] Support free videos and BAM_DTC by [ischmidt20](https://github.com/ischmidt20) * [extractor/animelab] Remove extractor by [gamer191](https://github.com/gamer191) * [extractor/bloomberg] Change playback endpoint by [m4tu4g](https://github.com/m4tu4g) * [extractor/ccc] Extract view_count by [vkorablin](https://github.com/vkorablin) * [extractor/crunchyroll:beta] Fix extractor after API change by [Burve](https://github.com/Burve), [tejing1](https://github.com/tejing1) * [extractor/curiositystream] Get `auth_token` from cookie by [mnn](https://github.com/mnn) * [extractor/digitalconcerthall] Fix extractor by [ZhymabekRoman](https://github.com/ZhymabekRoman) * [extractor/dropbox] Extract the correct `mountComponent` * [extractor/dropout] Login is not mandatory * [extractor/duboku] Fix for hostname change by [mozbugbox](https://github.com/mozbugbox) * [extractor/espn] Add `WatchESPN` extractor by [ischmidt20](https://github.com/ischmidt20), [pukkandan](https://github.com/pukkandan) * [extractor/expressen] Fix extractor by [aejdl](https://github.com/aejdl) * [extractor/foxnews] Update embed extraction by [elyse0](https://github.com/elyse0) * [extractor/ina] Fix extractor by [elyse0](https://github.com/elyse0) * [extractor/iwara:user] Make paging better by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/jwplatform] Look for `data-video-jw-id` * [extractor/lbry] Update livestream API by [flashdagger](https://github.com/flashdagger) * [extractor/mediaset] Improve `_VALID_URL` * [extractor/naver] Add `navernow` extractor by [ping](https://github.com/ping) * [extractor/niconico:series] Fix extractor by [sqrtNOT](https://github.com/sqrtNOT) * [extractor/npr] Use stream url from json-ld by [r5d](https://github.com/r5d) * [extractor/pornhub] Extract `uploader_id` field by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/radiofrance] Add more radios by [bubbleguuum](https://github.com/bubbleguuum) * [extractor/rumble] Detect JS embed * [extractor/rumble] Extract subtitles by [fstirlitz](https://github.com/fstirlitz) * [extractor/southpark] Add `southpark.lat` extractor by [darkxex](https://github.com/darkxex) * [extractor/spotify:show] Fix extractor * [extractor/tiktok] Detect embeds * [extractor/tiktok] Extract `SIGI_STATE` by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan), [sulyi](https://github.com/sulyi) * [extractor/tver] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/vevo] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [extractor/yahoo:gyao] Fix extractor * [extractor/zattoo] Fix live streams by [miseran](https://github.com/miseran) * [extractor/zdf] Improve format sorting by [elyse0](https://github.com/elyse0) ### 2022.05.18 * Add support for SSL client certificate authentication by [coletdjnz](https://github.com/coletdjnz), [dirkf](https://github.com/dirkf) * Adds `--client-certificate`, `--client-certificate-key`, `--client-certificate-password` * Add `--match-filter -` to interactively ask for each video * `--max-downloads` should obey `--break-per-input` * Allow use of weaker ciphers with `--legacy-server-connect` * Don't imply `-s` for later stages of `-O` * Fix `--date today` * Fix `--skip-unavailable-fragments` * Fix color in `-q -F` * Fix redirect HTTP method handling by [coletdjnz](https://github.com/coletdjnz) * Improve `--clean-infojson` * Remove warning for videos with an empty title * Run `FFmpegFixupM3u8PP` for live-streams if needed * Show name of downloader in verbose log * [cookies] Allow `cookiefile` to be a text stream * [cookies] Report progress when importing cookies * [downloader/ffmpeg] Specify headers for each URL by [elyse0](https://github.com/elyse0) * [fragment] Do not change chunk-size when `--test` * [fragment] Make single thread download work for `--live-from-start` by [Lesmiscore](https://github.com/Lesmiscore) * [hls] Fix `byte_range` for `EXT-X-MAP` fragment by [fstirlitz](https://github.com/fstirlitz) * [http] Fix retrying on read timeout by [coletdjnz](https://github.com/coletdjnz) * [ffmpeg] Fix features detection * [EmbedSubtitle] Enable for more video extensions * [EmbedThumbnail] Disable thumbnail conversion for mkv by [evansp](https://github.com/evansp) * [EmbedThumbnail] Do not obey `-k` * [EmbedThumbnail] Do not remove id3v1 tags * [FFmpegMetadata] Remove `\0` from metadata * [FFmpegMetadata] Remove filename from attached info-json * [FixupM3u8] Obey `--hls-prefer-mpegts` * [Sponsorblock] Don't crash when duration is unknown * [XAttrMetadata] Refactor and document dependencies * [extractor] Document netrc machines * [extractor] Update `manifest_url`s after redirect by [elyse0](https://github.com/elyse0) * [extractor] Update dash `manifest_url` after redirects by [elyse0](https://github.com/elyse0) * [extractor] Use `classmethod`/`property` where possible * [generic] Refactor `_extract_rss` * [utils] `is_html`: Handle double BOM * [utils] `locked_file`: Ignore illegal seek on `truncate` by [jakeogh](https://github.com/jakeogh) * [utils] `sanitize_path`: Fix when path is empty string * [utils] `write_string`: Workaround newline issue in `conhost` * [utils] `certifi`: Make sure the pem file exists * [utils] Fix `WebSocketsWrapper` * [utils] `locked_file`: Do not give executable bits for newly created files by [Lesmiscore](https://github.com/Lesmiscore) * [utils] `YoutubeDLCookieJar`: Detect and reject JSON file by [Lesmiscore](https://github.com/Lesmiscore) * [test] Convert warnings into errors and fix some existing warnings by [fstirlitz](https://github.com/fstirlitz) * [dependencies] Create module with all dependency imports * [compat] Split into sub-modules by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [compat] Implement `compat.imghdr` * [build] Add `make uninstall` by [MrRawes](https://github.com/MrRawes) * [build] Avoid use of `install -D` * [build] Fix `Makefile` by [putnam](https://github.com/putnam) * [build] Fix `--onedir` on macOS * [build] Add more test-runners * [cleanup] Deprecate some compat vars by [fstirlitz](https://github.com/fstirlitz), [pukkandan](https://github.com/pukkandan) * [cleanup] Remove unused code paths, extractors, scripts and tests by [fstirlitz](https://github.com/fstirlitz) * [cleanup] Upgrade syntax (`pyupgrade`) and sort imports (`isort`) * [cleanup, docs, build] Misc fixes * [BilibiliLive] Add extractor by [HE7086](https://github.com/HE7086), [pukkandan](https://github.com/pukkandan) * [Fifa] Add Extractor by [Bricio](https://github.com/Bricio) * [goodgame] Add extractor by [nevack](https://github.com/nevack) * [gronkh] Add playlist extractors by [hatienl0i261299](https://github.com/hatienl0i261299) * [icareus] Add extractor by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan) * [iwara] Add playlist extractors by [i6t](https://github.com/i6t) * [Likee] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [masters] Add extractor by [m4tu4g](https://github.com/m4tu4g) * [nebula] Add support for subscriptions by [hheimbuerger](https://github.com/hheimbuerger) * [Podchaser] Add extractors by [connercsbn](https://github.com/connercsbn) * [rokfin:search] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [youtube] Add `:ytnotifications` extractor by [krichbanana](https://github.com/krichbanana) * [youtube] Add YoutubeStoriesIE (`ytstories:<channel UCID>`) by [coletdjnz](https://github.com/coletdjnz) * [ZingMp3] Add chart and user extractors by [hatienl0i261299](https://github.com/hatienl0i261299) * [adn] Update AES key by [elyse0](https://github.com/elyse0) * [adobepass] Allow cookies for authenticating MSO * [bandcamp] Exclude merch links by [Yipten](https://github.com/Yipten) * [chingari] Fix archiving and tests * [DRTV] Improve `_VALID_URL` by [vertan](https://github.com/vertan) * [facebook] Improve thumbnail extraction by [Wikidepia](https://github.com/Wikidepia) * [fc2] Stop heatbeating once FFmpeg finishes by [Lesmiscore](https://github.com/Lesmiscore) * [Gofile] Fix extraction and support password-protected links by [mehq](https://github.com/mehq) * [hotstar, cleanup] Refactor extractors * [InfoQ] Don't fail on missing audio format by [evansp](https://github.com/evansp) * [Jamendo] Extract more metadata by [evansp](https://github.com/evansp) * [kaltura] Update API calls by [flashdagger](https://github.com/flashdagger) * [KhanAcademy] Fix extractor by [rand-net](https://github.com/rand-net) * [LCI] Fix extractor by [MarwenDallel](https://github.com/MarwenDallel) * [lrt] Support livestreams by [GiedriusS](https://github.com/GiedriusS) * [niconico] Set `expected_protocol` to a public field * [Niconico] Support 2FA by [ekangmonyet](https://github.com/ekangmonyet) * [Olympics] Fix format extension * [openrec:movie] Enable fallback for /movie/ URLs * [PearVideo] Add fallback for formats by [hatienl0i261299](https://github.com/hatienl0i261299) * [radiko] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [rai] Add `release_year` * [reddit] Prevent infinite loop * [rokfin] Implement login by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [ruutu] Support hs.fi embeds by [tpikonen](https://github.com/tpikonen), [pukkandan](https://github.com/pukkandan) * [spotify] Detect iframe embeds by [fstirlitz](https://github.com/fstirlitz) * [telegram] Fix metadata extraction * [tmz, cleanup] Update tests by [diegorodriguezv](https://github.com/diegorodriguezv) * [toggo] Fix `_VALID_URL` by [ca-za](https://github.com/ca-za) * [trovo] Update to new API by [nyuszika7h](https://github.com/nyuszika7h) * [TVer] Improve extraction by [Lesmiscore](https://github.com/Lesmiscore) * [twitcasting] Pass headers for each formats by [Lesmiscore](https://github.com/Lesmiscore) * [VideocampusSachsen] Improve extractor by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [vimeo] Fix extractors * [wat] Fix extraction of multi-language videos and subtitles by [elyse0](https://github.com/elyse0) * [wistia] Fix `_VALID_URL` by [dirkf](https://github.com/dirkf) * [youtube, cleanup] Minor refactoring by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [youtube] Added piped instance urls by [JordanWeatherby](https://github.com/JordanWeatherby) * [youtube] Deprioritize auto-generated thumbnails * [youtube] Deprioritize format 22 (often damaged) * [youtube] Fix episode metadata extraction * [zee5] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [zingmp3, cleanup] Refactor extractors ### 2022.04.08 * Use certificates from `certifi` if installed by [coletdjnz](https://github.com/coletdjnz) * Treat multiple `--match-filters` as OR * File locking improvements: * Do not lock downloading file on Windows * Do not prevent download if locking is unsupported * Do not truncate files before locking by [jakeogh](https://github.com/jakeogh), [pukkandan](https://github.com/pukkandan) * Fix non-blocking non-exclusive lock * De-prioritize automatic-subtitles when no `--sub-lang` is given * Exit after `--dump-user-agent` * Fallback to video-only format when selecting by extension * Fix `--abort-on-error` for subtitles * Fix `--no-overwrite` for playlist infojson * Fix `--print` with `--ignore-no-formats` when url is `None` by [flashdagger](https://github.com/flashdagger) * Fix `--sleep-interval` * Fix `--throttled-rate` * Fix `autonumber` * Fix case of `http_headers` * Fix filepath sanitization in `--print-to-file` * Handle float in `--wait-for-video` * Ignore `mhtml` formats from `-f mergeall` * Ignore format-specific fields in initial pass of `--match-filter` * Protect stdout from unexpected progress and console-title * Remove `Accept-Encoding` header from `std_headers` by [coletdjnz](https://github.com/coletdjnz) * Remove incorrect warning for `--dateafter` * Show warning when all media formats have DRM * [downloader] Fix invocation of `HttpieFD` * [http] Fix #3215 * [http] Reject broken range before request by [Lesmiscore](https://github.com/Lesmiscore), [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) * [fragment] Read downloaded fragments only when needed by [Lesmiscore](https://github.com/Lesmiscore) * [http] Retry on more errors by [coletdjnz](https://github.com/coletdjnz) * [mhtml] Fix fragments with absolute urls by [coletdjnz](https://github.com/coletdjnz) * [extractor] Add `_perform_login` function * [extractor] Allow control characters inside json * [extractor] Support merging subtitles with data by [coletdjnz](https://github.com/coletdjnz) * [generic] Extract subtitles from video.js by [Lesmiscore](https://github.com/Lesmiscore) * [ffmpeg] Cache version data * [FFmpegConcat] Ensure final directory exists * [FfmpegMetadata] Write id3v1 tags * [FFmpegVideoConvertor] Add more formats to `--remux-video` * [FFmpegVideoConvertor] Ensure all streams are copied * [MetadataParser] Validate outtmpl early * [outtmpl] Fix replacement/default when used with alternate * [outtmpl] Limit changes during sanitization * [phantomjs] Fix bug * [test] Add `test_locked_file` * [utils] `format_decimal_suffix`: Fix for very large numbers by [s0u1h](https://github.com/s0u1h) * [utils] `traverse_obj`: Allow filtering by value * [utils] Add `filter_dict`, `get_first`, `try_call` * [utils] ExtractorError: Fix for older Python versions * [utils] WebSocketsWrapper: Allow omitting `__enter__` invocation by [Lesmiscore](https://github.com/Lesmiscore) * [docs] Add an `.editorconfig` file by [fstirlitz](https://github.com/fstirlitz) * [docs] Clarify the exact `BSD` license of dependencies by [MrRawes](https://github.com/MrRawes) * [docs] Minor improvements by [pukkandan](https://github.com/pukkandan), [cffswb](https://github.com/cffswb), [danielyli](https://github.com/danielyli) * [docs] Remove readthedocs * [build] Add `requirements.txt` to pip distributions * [cleanup, postprocessor] Create `_download_json` * [cleanup, vimeo] Fix tests * [cleanup] Misc fixes and minor cleanup * [cleanup] Use `_html_extract_title` * [AfreecaTV] Add `AfreecaTVUserIE` by [hatienl0i261299](https://github.com/hatienl0i261299) * [arte] Add `format_note` to m3u8 formats * [azmedien] Add TVO Online to supported hosts by [1-Byte](https://github.com/1-Byte) * [BanBye] Add extractor by [mehq](https://github.com/mehq) * [bilibili] Fix extraction of title with quotes by [dzek69](https://github.com/dzek69) * [Craftsy] Add extractor by [Bricio](https://github.com/Bricio) * [Cybrary] Add extractor by [aaearon](https://github.com/aaearon) * [Huya] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [ITProTV] Add extractor by [aaearon](https://github.com/aaearon) * [Jable] Add extractors by [mehq](https://github.com/mehq) * [LastFM] Add extractors by [mehq](https://github.com/mehq) * [Moviepilot] Add extractor by [panatexxa](https://github.com/panatexxa) * [panopto] Add extractors by [coletdjnz](https://github.com/coletdjnz), [kmark](https://github.com/kmark) * [PokemonSoundLibrary] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [WasdTV] Add extractor by [un-def](https://github.com/un-def), [hatienl0i261299](https://github.com/hatienl0i261299) * [adobepass] Fix Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) * [afreecatv] Match new vod url by [wlritchi](https://github.com/wlritchi) * [AZMedien] Support `tv.telezueri.ch` by [goggle](https://github.com/goggle) * [BiliIntl] Support user-generated videos by [wlritchi](https://github.com/wlritchi) * [BRMediathek] Fix VALID_URL * [crunchyroll:playlist] Implement beta API by [tejing1](https://github.com/tejing1) * [crunchyroll] Fix inheritance * [daftsex] Fix extractor by [Soebb](https://github.com/Soebb) * [dailymotion] Support `geo.dailymotion.com` by [hatienl0i261299](https://github.com/hatienl0i261299) * [ellentube] Extract subtitles from manifest * [elonet] Rewrite extractor by [Fam0r](https://github.com/Fam0r), [pukkandan](https://github.com/pukkandan) * [fptplay] Fix metadata extraction by [hatienl0i261299](https://github.com/hatienl0i261299) * [FranceCulture] Support playlists by [bohwaz](https://github.com/bohwaz) * [go, viu] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) * [Imdb] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [MangoTV] Improve extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [Nebula] Fix bug in 52efa4b31200119adaa8acf33e50b84fcb6948f0 * [niconico] Fix extraction of thumbnails and uploader (#3266) * [niconico] Rewrite NiconicoIE by [Lesmiscore](https://github.com/Lesmiscore) * [nitter] Minor fixes and update instance list by [foghawk](https://github.com/foghawk) * [NRK] Extract timestamp by [hatienl0i261299](https://github.com/hatienl0i261299) * [openrec] Download archived livestreams by [Lesmiscore](https://github.com/Lesmiscore) * [openrec] Refactor extractors by [Lesmiscore](https://github.com/Lesmiscore) * [panopto] Improve subtitle extraction and support slides by [coletdjnz](https://github.com/coletdjnz) * [ParamountPlus, CBS] Change VALID_URL by [Sipherdrakon](https://github.com/Sipherdrakon) * [ParamountPlusSeries] Support multiple pages by [dodrian](https://github.com/dodrian) * [Piapro] Extract description with break lines by [Lesmiscore](https://github.com/Lesmiscore) * [rai] Fix extraction of http formas by [nixxo](https://github.com/nixxo) * [rumble] unescape title * [RUTV] Fix format sorting by [Lesmiscore](https://github.com/Lesmiscore) * [ruutu] Detect embeds by [tpikonen](https://github.com/tpikonen) * [tenplay] Improve extractor by [aarubui](https://github.com/aarubui) * [TikTok] Fix URLs with user id by [hatienl0i261299](https://github.com/hatienl0i261299) * [TikTokVM] Fix redirect to user URL * [TVer] Fix extractor by [Lesmiscore](https://github.com/Lesmiscore) * [TVer] Support landing page by [vvto33](https://github.com/vvto33) * [twitcasting] Don't return multi_video for archive with single hls manifest by [Lesmiscore](https://github.com/Lesmiscore) * [veo] Fix `_VALID_URL` * [Veo] Fix extractor by [i6t](https://github.com/i6t) * [viki] Don't attempt to modify URLs with signature by [nyuszika7h](https://github.com/nyuszika7h) * [viu] Fix bypass for preview by [zackmark29](https://github.com/zackmark29) * [viu] Fixed extractor by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) * [web.archive:youtube] Make CDX API requests non-fatal by [coletdjnz](https://github.com/coletdjnz) * [wget] Fix proxy by [kikuyan](https://github.com/kikuyan), [coletdjnz](https://github.com/coletdjnz) * [xnxx] Add `xnxx3.com` by [rozari0](https://github.com/rozari0) * [youtube] **Add new age-gate bypass** by [zerodytrash](https://github.com/zerodytrash), [pukkandan](https://github.com/pukkandan) * [youtube] Add extractor-arg to skip auto-translated subs * [youtube] Avoid false positives when detecting damaged formats * [youtube] Detect DRM better by [shirt](https://github.com/shirt-dev) * [youtube] Fix auto-translated automatic captions * [youtube] Fix pagination of `membership` tab * [youtube] Fix uploader for collaborative playlists by [coletdjnz](https://github.com/coletdjnz) * [youtube] Improve video upload date handling by [coletdjnz](https://github.com/coletdjnz) * [youtube:api] Prefer minified JSON response by [coletdjnz](https://github.com/coletdjnz) * [youtube:search] Support hashtag entries by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Fix duration extraction for shorts by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Minor improvements * [youtube:tab] Return shorts url if video is a short by [coletdjnz](https://github.com/coletdjnz) * [Zattoo] Fix extractors by [goggle](https://github.com/goggle) * [Zingmp3] Fix signature by [hatienl0i261299](https://github.com/hatienl0i261299) ### 2022.03.08.1 * [cleanup] Refactor `__init__.py` * [build] Fix bug ### 2022.03.08 * Merge youtube-dl: Upto [commit/6508688](https://github.com/ytdl-org/youtube-dl/commit/6508688e88c83bb811653083db9351702cd39a6a) (except NDR) * Add regex operator and quoting to format filters by [lukasfink1](https://github.com/lukasfink1) * Add brotli content-encoding support by [coletdjnz](https://github.com/coletdjnz) * Add pre-processor stage `after_filter` * Better error message when no `--live-from-start` format * Create necessary directories for `--print-to-file` * Fill more fields for playlists by [Lesmiscore](https://github.com/Lesmiscore) * Fix `-all` for `--sub-langs` * Fix doubling of `video_id` in `ExtractorError` * Fix for when stdout/stderr encoding is `None` * Handle negative duration from extractor * Implement `--add-header` without modifying `std_headers` * Obey `--abort-on-error` for "ffmpeg not installed" * Set `webpage_url_...` from `webpage_url` and not input URL * Tolerate failure to `--write-link` due to unknown URL * [aria2c] Add `--http-accept-gzip=true` * [build] Update pyinstaller to 4.10 by [shirt](https://github.com/shirt-dev) * [cookies] Update MacOS12 `Cookies.binarycookies` location by [mdpauley](https://github.com/mdpauley) * [devscripts] Improve `prepare_manpage` * [downloader] Do not use aria2c for non-native `m3u8` * [downloader] Obey `--file-access-retries` when deleting/renaming by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) * [extractor] Allow `http_headers` to be specified for `thumbnails` * [extractor] Extract subtitles from manifests for vimeo, globo, kaltura, svt by [fstirlitz](https://github.com/fstirlitz) * [extractor] Fix for manifests without period duration by [dirkf](https://github.com/dirkf), [pukkandan](https://github.com/pukkandan) * [extractor] Support `--mark-watched` without `_NETRC_MACHINE` by [coletdjnz](https://github.com/coletdjnz) * [FFmpegConcat] Abort on `--simulate` * [FormatSort] Consider `acodec`=`ogg` as `vorbis` * [fragment] Fix bugs around resuming with Range by [Lesmiscore](https://github.com/Lesmiscore) * [fragment] Improve `--live-from-start` for YouTube livestreams by [Lesmiscore](https://github.com/Lesmiscore) * [generic] Pass referer to extracted formats * [generic] Set rss `guid` as video id by [Bricio](https://github.com/Bricio) * [options] Better ambiguous option resolution * [options] Rename `--clean-infojson` to `--clean-info-json` * [SponsorBlock] Fixes for highlight and "full video labels" by [nihil-admirari](https://github.com/nihil-admirari) * [Sponsorblock] minor fixes by [nihil-admirari](https://github.com/nihil-admirari) * [utils] Better traceback for `ExtractorError` * [utils] Fix file locking for AOSP by [jakeogh](https://github.com/jakeogh) * [utils] Improve file locking * [utils] OnDemandPagedList: Do not download pages after error * [utils] render_table: Fix character calculation for removing extra gap by [Lesmiscore](https://github.com/Lesmiscore) * [utils] Use `locked_file` for `sanitize_open` by [jakeogh](https://github.com/jakeogh) * [utils] Validate `DateRange` input * [utils] WebSockets wrapper for non-async functions by [Lesmiscore](https://github.com/Lesmiscore) * [cleanup] Don't pass protocol to `_extract_m3u8_formats` for live videos * [cleanup] Remove extractors for some dead websites by [marieell](https://github.com/marieell) * [cleanup, docs] Misc cleanup * [AbemaTV] Add extractors by [Lesmiscore](https://github.com/Lesmiscore) * [adobepass] Add Suddenlink MSO by [CplPwnies](https://github.com/CplPwnies) * [ant1newsgr] Add extractor by [zmousm](https://github.com/zmousm) * [bigo] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [Caltrans] Add extractor by [Bricio](https://github.com/Bricio) * [daystar] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [fc2:live] Add extractor by [Lesmiscore](https://github.com/Lesmiscore) * [fptplay] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [murrtube] Add extractor by [cyberfox1691](https://github.com/cyberfox1691) * [nfb] Add extractor by [ofkz](https://github.com/ofkz) * [niconico] Add playlist extractors and refactor by [Lesmiscore](https://github.com/Lesmiscore) * [peekvids] Add extractor by [schn0sch](https://github.com/schn0sch) * [piapro] Add extractor by [pycabbage](https://github.com/pycabbage), [Lesmiscore](https://github.com/Lesmiscore) * [rokfin] Add extractor by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [rokfin] Add stack and channel extractors by [P-reducible](https://github.com/P-reducible), [pukkandan](https://github.com/pukkandan) * [ruv.is] Add extractor by [iw0nderhow](https://github.com/iw0nderhow) * [telegram] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [VideocampusSachsen] Add extractors by [FestplattenSchnitzel](https://github.com/FestplattenSchnitzel) * [xinpianchang] Add extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [abc] Support 1080p by [Ronnnny](https://github.com/Ronnnny) * [afreecatv] Support password-protected livestreams by [wlritchi](https://github.com/wlritchi) * [ard] Fix valid URL * [ATVAt] Detect geo-restriction by [marieell](https://github.com/marieell) * [bandcamp] Detect acodec * [bandcamp] Fix user URLs by [lyz-code](https://github.com/lyz-code) * [bbc] Fix extraction of news articles by [ajj8](https://github.com/ajj8) * [beeg] Fix extractor by [Bricio](https://github.com/Bricio) * [bigo] Fix extractor to not to use `form_params` * [Bilibili] Pass referer for all formats by [blackgear](https://github.com/blackgear) * [Biqle] Fix extractor by [Bricio](https://github.com/Bricio) * [ccma] Fix timestamp parsing by [nyuszika7h](https://github.com/nyuszika7h) * [crunchyroll] Better error reporting on login failure by [tejing1](https://github.com/tejing1) * [cspan] Support of C-Span congress videos by [Grabien](https://github.com/Grabien) * [dropbox] fix regex by [zenerdi0de](https://github.com/zenerdi0de) * [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) * [fujitv] Extract resolution for free sources by [YuenSzeHong](https://github.com/YuenSzeHong) * [Gettr] Add `GettrStreamingIE` by [i6t](https://github.com/i6t) * [Gettr] Fix formats order by [i6t](https://github.com/i6t) * [Gettr] Improve extractor by [i6t](https://github.com/i6t) * [globo] Expand valid URL by [Bricio](https://github.com/Bricio) * [lbry] Fix `--ignore-no-formats-error` * [manyvids] Extract `uploader` by [regarten](https://github.com/regarten) * [mildom] Fix linter * [mildom] Rework extractors by [Lesmiscore](https://github.com/Lesmiscore) * [mirrativ] Cleanup extractor code by [Lesmiscore](https://github.com/Lesmiscore) * [nhk] Add support for NHK for School by [Lesmiscore](https://github.com/Lesmiscore) * [niconico:tag] Add support for searching tags * [nrk] Add fallback API * [peekvids] Use JSON-LD by [schn0sch](https://github.com/schn0sch) * [peertube] Add media.fsfe.org by [mxmehl](https://github.com/mxmehl) * [rtvs] Fix extractor by [Bricio](https://github.com/Bricio) * [spiegel] Fix `_VALID_URL` * [ThumbnailsConvertor] Support `webp` * [tiktok] Fix `vm.tiktok`/`vt.tiktok` URLs * [tubitv] Fix/improve TV series extraction by [bbepis](https://github.com/bbepis) * [tumblr] Fix extractor by [foghawk](https://github.com/foghawk) * [twitcasting] Add fallback for finding running live by [Lesmiscore](https://github.com/Lesmiscore) * [TwitCasting] Check for password protection by [Lesmiscore](https://github.com/Lesmiscore) * [twitcasting] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) * [twitch] Fix field name of `view_count` * [twitter] Fix for private videos by [iphoting](https://github.com/iphoting) * [washingtonpost] Fix extractor by [Bricio](https://github.com/Bricio) * [youtube:tab] Add `approximate_date` extractor-arg * [youtube:tab] Follow redirect to regional channel by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Reject webpage data if redirected to home page * [youtube] De-prioritize potentially damaged formats * [youtube] Differentiate descriptive audio by language code * [youtube] Ensure subtitle urls are absolute by [coletdjnz](https://github.com/coletdjnz) * [youtube] Escape possible `$` in `_extract_n_function_name` regex by [Lesmiscore](https://github.com/Lesmiscore) * [youtube] Fix automatic captions * [youtube] Fix n-sig extraction for phone player JS by [MinePlayersPE](https://github.com/MinePlayersPE) * [youtube] Further de-prioritize 3gp format * [youtube] Label original auto-subs * [youtube] Prefer UTC upload date for videos by [coletdjnz](https://github.com/coletdjnz) * [zaq1] Remove dead extractor by [marieell](https://github.com/marieell) * [zee5] Support web-series by [Aniruddh-J](https://github.com/Aniruddh-J) * [zingmp3] Fix extractor by [hatienl0i261299](https://github.com/hatienl0i261299) * [zoom] Add support for screen cast by [Mipsters](https://github.com/Mipsters) ### 2022.02.04 * [youtube:search] Fix extractor by [coletdjnz](https://github.com/coletdjnz) * [youtube:search] Add tests * [twitcasting] Enforce UTF-8 for POST payload by [Lesmiscore](https://github.com/Lesmiscore) * [mediaset] Fix extractor by [nixxo](https://github.com/nixxo) * [websocket] Make syntax error in `websockets` module non-fatal ### 2022.02.03 * Merge youtube-dl: Upto [commit/78ce962](https://github.com/ytdl-org/youtube-dl/commit/78ce962f4fe020994c216dd2671546fbe58a5c67) * Add option `--print-to-file` * Make nested --config-locations relative to parent file * Ensure `_type` is present in `info.json` * Fix `--compat-options list-formats` * Fix/improve `InAdvancePagedList` * [downloader/ffmpeg] Handle unknown formats better * [outtmpl] Handle `-o ""` better * [outtmpl] Handle hard-coded file extension better * [extractor] Add convenience function `_yes_playlist` * [extractor] Allow non-fatal `title` extraction * [extractor] Extract video inside `Article` json_ld * [generic] Allow further processing of json_ld URL * [cookies] Fix keyring selection for unsupported desktops * [utils] Strip double spaces in `clean_html` by [dirkf](https://github.com/dirkf) * [aes] Add `unpad_pkcs7` * [test] Fix `test_youtube_playlist_noplaylist` * [docs,cleanup] Misc cleanup * [dplay] Add extractors for site changes by [Sipherdrakon](https://github.com/Sipherdrakon) * [ertgr] Add extractors by [zmousm](https://github.com/zmousm), [dirkf](https://github.com/dirkf) * [Musicdex] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [YandexVideoPreview] Add extractor by [KiberInfinity](https://github.com/KiberInfinity) * [youtube] Add extractor `YoutubeMusicSearchURLIE` * [archive.org] Ignore unnecessary files * [Bilibili] Add 8k support by [u-spec-png](https://github.com/u-spec-png) * [bilibili] Fix extractor, make anthology title non-fatal * [CAM4] Add thumbnail extraction by [alerikaisattera](https://github.com/alerikaisattera) * [cctv] De-prioritize sample format * [crunchyroll:beta] Add cookies support by [tejing1](https://github.com/tejing1) * [crunchyroll] Fix login by [tejing1](https://github.com/tejing1) * [doodstream] Fix extractor * [fc2] Fix extraction by [Lesmiscore](https://github.com/Lesmiscore) * [FFmpegConcat] Abort on --skip-download and download errors * [Fujitv] Extract metadata and support premium by [YuenSzeHong](https://github.com/YuenSzeHong) * [globo] Fix extractor by [Bricio](https://github.com/Bricio) * [glomex] Simplify embed detection * [GoogleSearch] Fix extractor * [Instagram] Fix extraction when logged in by [MinePlayersPE](https://github.com/MinePlayersPE) * [iq.com] Add VIP support by [MinePlayersPE](https://github.com/MinePlayersPE) * [mildom] Fix extractor by [lazypete365](https://github.com/lazypete365) * [MySpass] Fix video url processing by [trassshhub](https://github.com/trassshhub) * [Odnoklassniki] Improve embedded players extraction by [KiberInfinity](https://github.com/KiberInfinity) * [orf:tvthek] Lazy playlist extraction and obey --no-playlist * [Pladform] Fix redirection to external player by [KiberInfinity](https://github.com/KiberInfinity) * [ThisOldHouse] Improve Premium URL check by [Ashish0804](https://github.com/Ashish0804) * [TikTok] Iterate through app versions by [MinePlayersPE](https://github.com/MinePlayersPE) * [tumblr] Fix 403 errors and handle vimeo embeds by [foghawk](https://github.com/foghawk) * [viki] Fix "Bad request" for manifest by [nyuszika7h](https://github.com/nyuszika7h) * [Vimm] add recording extractor by [alerikaisattera](https://github.com/alerikaisattera) * [web.archive:youtube] Add `ytarchive:` prefix and misc cleanup * [youtube:api] Do not use seek when reading HTTPError response by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix n-sig for player e06dea74 * [youtube, cleanup] Misc fixes and cleanup ### 2022.01.21 * Add option `--concat-playlist` to **concat videos in a playlist** * Allow **multiple and nested configuration files** * Add more post-processing stages (`after_video`, `playlist`) * Allow `--exec` to be run at any post-processing stage (Deprecates `--exec-before-download`) * Allow `--print` to be run at any post-processing stage * Allow listing formats, thumbnails, subtitles using `--print` by [pukkandan](https://github.com/pukkandan), [Zirro](https://github.com/Zirro) * Add fields `video_autonumber`, `modified_date`, `modified_timestamp`, `playlist_count`, `channel_follower_count` * Add key `requested_downloads` in the root `info_dict` * Write `download_archive` only after all formats are downloaded * [FfmpegMetadata] Allow setting metadata of individual streams using `meta<n>_` prefix * Add option `--legacy-server-connect` by [xtkoba](https://github.com/xtkoba) * Allow escaped `,` in `--extractor-args` * Allow unicode characters in `info.json` * Check for existing thumbnail/subtitle in final directory * Don't treat empty containers as `None` in `sanitize_info` * Fix `-s --ignore-no-formats --force-write-archive` * Fix live title for multiple formats * List playlist thumbnails in `--list-thumbnails` * Raise error if subtitle download fails * [cookies] Fix bug when keyring is unspecified * [ffmpeg] Ignore unknown streams, standardize use of `-map 0` * [outtmpl] Alternate form for `D` and fix suffix's case * [utils] Add `Sec-Fetch-Mode` to `std_headers` * [utils] Fix `format_bytes` output for Bytes by [pukkandan](https://github.com/pukkandan), [mdawar](https://github.com/mdawar) * [utils] Handle `ss:xxx` in `parse_duration` * [utils] Improve parsing for nested HTML elements by [zmousm](https://github.com/zmousm), [pukkandan](https://github.com/pukkandan) * [utils] Use key `None` in `traverse_obj` to return as-is * [extractor] Detect more subtitle codecs in MPD manifests by [fstirlitz](https://github.com/fstirlitz) * [extractor] Extract chapters from JSON-LD by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) * [extractor] Extract thumbnails from JSON-LD by [nixxo](https://github.com/nixxo) * [extractor] Improve `url_result` and related * [generic] Improve KVS player extraction by [trassshhub](https://github.com/trassshhub) * [build] Reduce dependency on third party workflows * [extractor,cleanup] Use `_search_nextjs_data`, `format_field` * [cleanup] Minor fixes and cleanup * [docs] Improvements * [test] Fix TestVerboseOutput * [afreecatv] Add livestreams extractor by [wlritchi](https://github.com/wlritchi) * [callin] Add extractor by [foghawk](https://github.com/foghawk) * [CrowdBunker] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [daftsex] Add extractors by [k3ns1n](https://github.com/k3ns1n) * [digitalconcerthall] Add extractor by [teridon](https://github.com/teridon) * [Drooble] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [EuropeanTour] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [iq.com] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE) * [KelbyOne] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [LnkIE] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [MainStreaming] Add extractor by [coletdjnz](https://github.com/coletdjnz) * [megatvcom] Add extractors by [zmousm](https://github.com/zmousm) * [Newsy] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [noodlemagazine] Add extractor by [trassshhub](https://github.com/trassshhub) * [PokerGo] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [Pornez] Add extractor by [mozlima](https://github.com/mozlima) * [PRX] Add Extractors by [coletdjnz](https://github.com/coletdjnz) * [RTNews] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Rule34video] Add extractor by [trassshhub](https://github.com/trassshhub) * [tvopengr] Add extractors by [zmousm](https://github.com/zmousm) * [Vimm] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [glomex] Add extractors by [zmousm](https://github.com/zmousm) * [instagram] Add story/highlight extractor by [u-spec-png](https://github.com/u-spec-png) * [openrec] Add movie extractor by [Lesmiscore](https://github.com/Lesmiscore) * [rai] Add Raiplaysound extractors by [nixxo](https://github.com/nixxo), [pukkandan](https://github.com/pukkandan) * [aparat] Fix extractor * [ard] Extract subtitles by [fstirlitz](https://github.com/fstirlitz) * [BiliIntl] Add login by [MinePlayersPE](https://github.com/MinePlayersPE) * [CeskaTelevize] Use `http` for manifests * [CTVNewsIE] Add fallback for video search by [Ashish0804](https://github.com/Ashish0804) * [dplay] Migrate DiscoveryPlusItaly to DiscoveryPlus by [timendum](https://github.com/timendum) * [dplay] Re-structure DiscoveryPlus extractors * [Dropbox] Support password protected files and more formats by [zenerdi0de](https://github.com/zenerdi0de) * [facebook] Fix extraction from groups * [facebook] Improve title and uploader extraction * [facebook] Parse dash manifests * [fox] Extract m3u8 from preview by [ischmidt20](https://github.com/ischmidt20) * [funk] Support origin URLs * [gfycat] Fix `uploader` * [gfycat] Support embeds by [coletdjnz](https://github.com/coletdjnz) * [hotstar] Add extractor args to ignore tags by [Ashish0804](https://github.com/Ashish0804) * [hrfernsehen] Fix ardloader extraction by [CreaValix](https://github.com/CreaValix) * [instagram] Fix username extraction for stories and highlights by [nyuszika7h](https://github.com/nyuszika7h) * [kakao] Detect geo-restriction * [line] Remove `tv.line.me` by [sian1468](https://github.com/sian1468) * [mixch] Add `MixchArchiveIE` by [Lesmiscore](https://github.com/Lesmiscore) * [mixcloud] Detect restrictions by [llacb47](https://github.com/llacb47) * [NBCSports] Fix extraction of platform URLs by [ischmidt20](https://github.com/ischmidt20) * [Nexx] Extract more metadata by [MinePlayersPE](https://github.com/MinePlayersPE) * [Nexx] Support 3q CDN by [MinePlayersPE](https://github.com/MinePlayersPE) * [pbs] de-prioritize AD formats * [PornHub,YouTube] Refresh onion addresses by [unit193](https://github.com/unit193) * [RedBullTV] Parse subtitles from manifest by [Ashish0804](https://github.com/Ashish0804) * [streamcz] Fix extractor by [arkamar](https://github.com/arkamar), [pukkandan](https://github.com/pukkandan) * [Ted] Rewrite extractor by [pukkandan](https://github.com/pukkandan), [trassshhub](https://github.com/trassshhub) * [Theta] Fix valid URL by [alerikaisattera](https://github.com/alerikaisattera) * [ThisOldHouseIE] Add support for premium videos by [Ashish0804](https://github.com/Ashish0804) * [TikTok] Fix extraction for sigi-based webpages, add API fallback by [MinePlayersPE](https://github.com/MinePlayersPE) * [TikTok] Pass cookies to formats, and misc fixes by [MinePlayersPE](https://github.com/MinePlayersPE) * [TikTok] Extract captions, user thumbnail by [MinePlayersPE](https://github.com/MinePlayersPE) * [TikTok] Change app version by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) * [TVer] Extract message for unaired live by [Lesmiscore](https://github.com/Lesmiscore) * [twitcasting] Refactor extractor by [Lesmiscore](https://github.com/Lesmiscore) * [twitter] Fix video in quoted tweets * [veoh] Improve extractor by [foghawk](https://github.com/foghawk) * [vk] Capture `clip` URLs * [vk] Fix VKUserVideosIE by [Ashish0804](https://github.com/Ashish0804) * [vk] Improve `_VALID_URL` by [k3ns1n](https://github.com/k3ns1n) * [VrtNU] Handle empty title by [pgaig](https://github.com/pgaig) * [XVideos] Check HLS formats by [MinePlayersPE](https://github.com/MinePlayersPE) * [yahoo:gyao] Improved playlist handling by [hyano](https://github.com/hyano) * [youtube:tab] Extract more playlist metadata by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [youtube:tab] Raise error on tab redirect by [krichbanana](https://github.com/krichbanana), [coletdjnz](https://github.com/coletdjnz) * [youtube] Update Innertube clients by [coletdjnz](https://github.com/coletdjnz) * [youtube] Detect live-stream embeds * [youtube] Do not return `upload_date` for playlists * [youtube] Extract channel subscriber count by [coletdjnz](https://github.com/coletdjnz) * [youtube] Make invalid storyboard URL non-fatal * [youtube] Enforce UTC, update innertube clients and tests by [coletdjnz](https://github.com/coletdjnz) * [zdf] Add chapter extraction by [iw0nderhow](https://github.com/iw0nderhow) * [zee5] Add geo-bypass ### 2021.12.27 * Avoid recursion error when re-extracting info * [ffmpeg] Fix position of `--ppa` * [aria2c] Don't show progress when `--no-progress` * [cookies] Support other keyrings by [mbway](https://github.com/mbway) * [EmbedThumbnail] Prefer AtomicParsley over ffmpeg if available * [generic] Fix HTTP KVS Player by [git-anony-mouse](https://github.com/git-anony-mouse) * [ThumbnailsConvertor] Fix for when there are no thumbnails * [docs] Add examples for using `TYPES:` in `-P`/`-o` * [PixivSketch] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [tiktok] Add music, sticker and tag IEs by [MinePlayersPE](https://github.com/MinePlayersPE) * [BiliIntl] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [CBC] Fix URL regex * [tiktok] Fix `extractor_key` used in archive * [youtube] **End `live-from-start` properly when stream ends with 403** * [Zee5] Fix VALID_URL for tv-shows by [Ashish0804](https://github.com/Ashish0804) ### 2021.12.25 * [dash,youtube] **Download live from start to end** by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan) * Add option `--live-from-start` to enable downloading live videos from start * Add key `is_from_start` in formats to identify formats (of live videos) that downloads from start * [dash] Create protocol `http_dash_segments_generator` that allows a function to be passed instead of fragments * [fragment] Allow multiple live dash formats to download simultaneously * [youtube] Implement fragment re-fetching for the live dash formats * [youtube] Re-extract dash manifest every 5 hours (manifest expires in 6hrs) * [postprocessor/ffmpeg] Add `FFmpegFixupDuplicateMoovPP` to fixup duplicated moov atoms * Known issues: * Ctrl+C doesn't work on Windows when downloading multiple formats * If video becomes private, download hangs * [SponsorBlock] Add `Filler` and `Highlight` categories by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * Change `--sponsorblock-cut all` to `--sponsorblock-cut default` if you do not want filler sections to be removed * Add field `webpage_url_domain` * Add interactive format selection with `-f -` * Add option `--file-access-retries` by [ehoogeveen-medweb](https://github.com/ehoogeveen-medweb) * [outtmpl] Add alternate forms `S`, `D` and improve `id` detection * [outtmpl] Add operator `&` for replacement text by [PilzAdam](https://github.com/PilzAdam) * [EmbedSubtitle] Disable duration check temporarily * [extractor] Add `_search_nuxt_data` by [nao20010128nao](https://github.com/nao20010128nao) * [extractor] Ignore errors in comment extraction when `-i` is given * [extractor] Standardize `_live_title` * [FormatSort] Prevent incorrect deprecation warning * [generic] Extract m3u8 formats from JSON-LD * [postprocessor/ffmpeg] Always add `faststart` * [utils] Fix parsing `YYYYMMDD` dates in Nov/Dec by [wlritchi](https://github.com/wlritchi) * [utils] Improve `parse_count` * [utils] Update `std_headers` by [kikuyan](https://github.com/kikuyan), [fstirlitz](https://github.com/fstirlitz) * [lazy_extractors] Fix for search IEs * [extractor] Support default implicit graph in JSON-LD by [zmousm](https://github.com/zmousm) * Allow `--no-write-thumbnail` to override `--write-all-thumbnail` * Fix `--throttled-rate` * Fix control characters being printed to `--console-title` * Fix PostProcessor hooks not registered for some PPs * Pre-process when using `--flat-playlist` * Remove known invalid thumbnails from `info_dict` * Add warning when using `-f best` * Use `parse_duration` for `--wait-for-video` and some minor fix * [test/download] Add more fields * [test/download] Ignore field `webpage_url_domain` by [std-move](https://github.com/std-move) * [compat] Suppress errors in enabling VT mode * [docs] Improve manpage format by [iw0nderhow](https://github.com/iw0nderhow), [pukkandan](https://github.com/pukkandan) * [docs,cleanup] Minor fixes and cleanup * [cleanup] Fix some typos by [unit193](https://github.com/unit193) * [ABC:iview] Add show extractor by [pabs3](https://github.com/pabs3) * [dropout] Add extractor by [TwoThousandHedgehogs](https://github.com/TwoThousandHedgehogs), [pukkandan](https://github.com/pukkandan) * [GameJolt] Add extractors by [MinePlayersPE](https://github.com/MinePlayersPE) * [gofile] Add extractor by [Jertzukka](https://github.com/Jertzukka), [Ashish0804](https://github.com/Ashish0804) * [hse] Add extractors by [cypheron](https://github.com/cypheron), [pukkandan](https://github.com/pukkandan) * [NateTV] Add NateIE and NateProgramIE by [Ashish0804](https://github.com/Ashish0804), [Hyeeji](https://github.com/Hyeeji) * [OpenCast] Add extractors by [bwildenhain](https://github.com/bwildenhain), [C0D3D3V](https://github.com/C0D3D3V) * [rtve] Add `RTVEAudioIE` by [kebianizao](https://github.com/kebianizao) * [Rutube] Add RutubeChannelIE by [Ashish0804](https://github.com/Ashish0804) * [skeb] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [soundcloud] Add related tracks extractor by [Lapin0t](https://github.com/Lapin0t) * [toggo] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) * [TrueID] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [audiomack] Update album and song VALID_URL by [abdullah-if](https://github.com/abdullah-if), [dirkf](https://github.com/dirkf) * [CBC Gem] Extract 1080p formats by [DavidSkrundz](https://github.com/DavidSkrundz) * [ceskatelevize] Fetch iframe from nextJS data by [mkubecek](https://github.com/mkubecek) * [crackle] Look for non-DRM formats by [raleeper](https://github.com/raleeper) * [dplay] Temporary fix for `discoveryplus.com/it` * [DiscoveryPlusShowBaseIE] yield actual video id by [Ashish0804](https://github.com/Ashish0804) * [Facebook] Handle redirect URLs * [fujitv] Extract 1080p from `tv_android` m3u8 by [YuenSzeHong](https://github.com/YuenSzeHong) * [gronkh] Support new URL pattern by [Sematre](https://github.com/Sematre) * [instagram] Expand valid URL by [u-spec-png](https://github.com/u-spec-png) * [Instagram] Try bypassing login wall with embed page by [MinePlayersPE](https://github.com/MinePlayersPE) * [Jamendo] Fix use of `_VALID_URL_RE` by [jaller94](https://github.com/jaller94) * [LBRY] Support livestreams by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [NJPWWorld] Extract formats from m3u8 by [aarubui](https://github.com/aarubui) * [NovaEmbed] update player regex by [std-move](https://github.com/std-move) * [npr] Make SMIL extraction non-fatal by [r5d](https://github.com/r5d) * [ntvcojp] Extract NUXT data by [nao20010128nao](https://github.com/nao20010128nao) * [ok.ru] add mobile fallback by [nao20010128nao](https://github.com/nao20010128nao) * [olympics] Add uploader and cleanup by [u-spec-png](https://github.com/u-spec-png) * [ondemandkorea] Update `jw_config` regex by [julien-hadleyjack](https://github.com/julien-hadleyjack) * [PlutoTV] Expand `_VALID_URL` * [RaiNews] Fix extractor by [nixxo](https://github.com/nixxo) * [RCTIPlusSeries] Lazy extraction and video type selection by [MinePlayersPE](https://github.com/MinePlayersPE) * [redtube] Handle formats delivered inside a JSON by [dirkf](https://github.com/dirkf), [nixxo](https://github.com/nixxo) * [SonyLiv] Add OTP login support by [Ashish0804](https://github.com/Ashish0804) * [Steam] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [TikTok] Pass cookies to mobile API by [MinePlayersPE](https://github.com/MinePlayersPE) * [trovo] Fix inheritance of `TrovoChannelBaseIE` * [TVer] Extract better thumbnails by [YuenSzeHong](https://github.com/YuenSzeHong) * [vimeo] Extract chapters * [web.archive:youtube] Improve metadata extraction by [coletdjnz](https://github.com/coletdjnz) * [youtube:comments] Add more options for limiting number of comments extracted by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Extract more metadata from feeds/channels/playlists by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Extract video thumbnails from playlist by [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [youtube:tab] Ignore query when redirecting channel to playlist and cleanup of related code * [youtube] Fix `ytsearchdate` * [zdf] Support videos with different ptmd location by [iw0nderhow](https://github.com/iw0nderhow) * [zee5] Support /episodes in URL ### 2021.12.01 * **Add option `--wait-for-video` to wait for scheduled streams** * Add option `--break-per-input` to apply --break-on... to each input URL * Add option `--embed-info-json` to embed info.json in mkv * Add compat-option `embed-metadata` * Allow using a custom format selector through API * [AES] Add ECB mode by [nao20010128nao](https://github.com/nao20010128nao) * [build] Fix MacOS Build * [build] Save Git HEAD at release alongside version info * [build] Use `workflow_dispatch` for release * [downloader/ffmpeg] Fix for direct videos inside mpd manifests * [downloader] Add colors to download progress * [EmbedSubtitles] Slightly relax duration check and related cleanup * [ExtractAudio] Fix conversion to `wav` and `vorbis` * [ExtractAudio] Support `alac` * [extractor] Extract `average_rating` from JSON-LD * [FixupM3u8] Fixup MPEG-TS in MP4 container * [generic] Support mpd manifests without extension by [shirt](https://github.com/shirt-dev) * [hls] Better FairPlay DRM detection by [nyuszika7h](https://github.com/nyuszika7h) * [jsinterp] Fix splice to handle float (for youtube js player f1ca6900) * [utils] Allow alignment in `render_table` and add tests * [utils] Fix `PagedList` * [utils] Fix error when copying `LazyList` * Clarify video/audio-only formats in -F * Ensure directory exists when checking formats * Ensure path for link files exists by [Zirro](https://github.com/Zirro) * Ensure same config file is not loaded multiple times * Fix `postprocessor_hooks` * Fix `--break-on-archive` when pre-checking * Fix `--check-formats` for `mhtml` * Fix `--load-info-json` of playlists with failed entries * Fix `--trim-filename` when filename has `.` * Fix bug in parsing `--add-header` * Fix error in `report_unplayable_conflict` by [shirt](https://github.com/shirt-dev) * Fix writing playlist infojson with `--no-clean-infojson` * Validate --get-bypass-country * [blogger] Add extractor by [pabs3](https://github.com/pabs3) * [breitbart] Add extractor by [Grabien](https://github.com/Grabien) * [CableAV] Add extractor by [j54vc1bk](https://github.com/j54vc1bk) * [CanalAlpha] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [CozyTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [CPTwentyFour] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [DiscoveryPlus] Add `DiscoveryPlusItalyShowIE` by [Ashish0804](https://github.com/Ashish0804) * [ESPNCricInfo] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [LinkedIn] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [mixch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [nebula] Add `NebulaCollectionIE` and rewrite extractor by [hheimbuerger](https://github.com/hheimbuerger) * [OneFootball] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [peer.tv] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [radiozet] Add extractor by [0xA7404A](https://github.com/0xA7404A) (Aurora) * [redgifs] Add extractor by [chio0hai](https://github.com/chio0hai) * [RedGifs] Add Search and User extractors by [Deer-Spangle](https://github.com/Deer-Spangle) * [rtrfm] Add extractor by [pabs3](https://github.com/pabs3) * [Streamff] Add extractor by [cntrl-s](https://github.com/cntrl-s) * [Stripchat] Add extractor by [zulaport](https://github.com/zulaport) * [Aljazeera] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [AmazonStoreIE] Fix regex to not match vdp urls by [Ashish0804](https://github.com/Ashish0804) * [ARDBetaMediathek] Handle new URLs * [bbc] Get all available formats by [nyuszika7h](https://github.com/nyuszika7h) * [Bilibili] Fix title extraction by [u-spec-png](https://github.com/u-spec-png) * [CBC Gem] Fix for shows that don't have all seasons by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [curiositystream] Add more metadata * [CuriosityStream] Fix series * [DiscoveryPlus] Rewrite extractors by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [HotStar] Set language field from tags by [Ashish0804](https://github.com/Ashish0804) * [instagram, cleanup] Refactor extractors * [Instagram] Display more login errors by [MinePlayersPE](https://github.com/MinePlayersPE) * [itv] Fix extractor by [staubichsauger](https://github.com/staubichsauger), [pukkandan](https://github.com/pukkandan) * [mediaklikk] Expand valid URL * [MTV] Improve mgid extraction by [Sipherdrakon](https://github.com/Sipherdrakon), [kikuyan](https://github.com/kikuyan) * [nexx] Better error message for unsupported format * [NovaEmbed] Fix extractor by [pukkandan](https://github.com/pukkandan), [std-move](https://github.com/std-move) * [PatreonUser] Do not capture RSS URLs * [Reddit] Add support for 1080p videos by [xenova](https://github.com/xenova) * [RoosterTeethSeries] Fix for multiple pages by [MinePlayersPE](https://github.com/MinePlayersPE) * [sbs] Fix for movies and livestreams * [Senate.gov] Add SenateGovIE and fix SenateISVPIE by [Grabien](https://github.com/Grabien), [pukkandan](https://github.com/pukkandan) * [soundcloud:search] Fix pagination * [tiktok:user] Set `webpage_url` correctly * [Tokentube] Fix description by [u-spec-png](https://github.com/u-spec-png) * [trovo] Fix extractor by [nyuszika7h](https://github.com/nyuszika7h) * [tv2] Expand valid URL * [Tvplayhome] Fix extractor by [pukkandan](https://github.com/pukkandan), [18928172992817182](https://github.com/18928172992817182) * [Twitch:vod] Add chapters by [mpeter50](https://github.com/mpeter50) * [twitch:vod] Extract live status by [DEvmIb](https://github.com/DEvmIb) * [VidLii] Add 720p support by [mrpapersonic](https://github.com/mrpapersonic) * [vimeo] Add fallback for config URL * [vimeo] Sort http formats higher * [WDR] Expand valid URL * [willow] Add extractor by [aarubui](https://github.com/aarubui) * [xvideos] Detect embed URLs by [4a1e2y5](https://github.com/4a1e2y5) * [xvideos] Fix extractor by [Yakabuff](https://github.com/Yakabuff) * [youtube, cleanup] Reorganize Tab and Search extractor inheritances * [youtube:search_url] Add playlist/channel support * [youtube] Add `default` player client by [coletdjnz](https://github.com/coletdjnz) * [youtube] Add storyboard formats * [youtube] Decrypt n-sig for URLs with `ratebypass` * [youtube] Minor improvement to format sorting * [cleanup] Add deprecation warnings * [cleanup] Refactor `JSInterpreter._seperate` * [Cleanup] Remove some unnecessary groups in regexes by [Ashish0804](https://github.com/Ashish0804) * [cleanup] Misc cleanup ### 2021.11.10.1 * Temporarily disable MacOS Build ### 2021.11.10 * [youtube] **Fix throttling by decrypting n-sig** * Merging extractors from [haruhi-dl](https://git.sakamoto.pl/laudom/haruhi-dl) by [selfisekai](https://github.com/selfisekai) * [extractor] Add `_search_nextjs_data` * [tvp] Fix extractors * [tvp] Add TVPStreamIE * [wppilot] Add extractors * [polskieradio] Add extractors * [radiokapital] Add extractors * [polsatgo] Add extractor by [selfisekai](https://github.com/selfisekai), [sdomi](https://github.com/sdomi) * Separate `--check-all-formats` from `--check-formats` * Approximate filesize from bitrate * Don't create console in `windows_enable_vt_mode` * Fix bug in `--load-infojson` of playlists * [minicurses] Add colors to `-F` and standardize color-printing code * [outtmpl] Add type `link` for internet shortcut files * [outtmpl] Add alternate forms for `q` and `j` * [outtmpl] Do not traverse `None` * [fragment] Fix progress display in fragmented downloads * [downloader/ffmpeg] Fix vtt download with ffmpeg * [ffmpeg] Detect presence of setts and libavformat version * [ExtractAudio] Rescale `--audio-quality` correctly by [CrypticSignal](https://github.com/CrypticSignal), [pukkandan](https://github.com/pukkandan) * [ExtractAudio] Use `libfdk_aac` if available by [CrypticSignal](https://github.com/CrypticSignal) * [FormatSort] `eac3` is better than `ac3` * [FormatSort] Fix some fields' defaults * [generic] Detect more json_ld * [generic] parse jwplayer with only the json URL * [extractor] Add keyword automatically to SearchIE descriptions * [extractor] Fix some errors being converted to `ExtractorError` * [utils] Add `join_nonempty` * [utils] Add `jwt_decode_hs256` by [Ashish0804](https://github.com/Ashish0804) * [utils] Create `DownloadCancelled` exception * [utils] Parse `vp09` as vp9 * [utils] Sanitize URL when determining protocol * [test/download] Fallback test to `bv` * [docs] Minor documentation improvements * [cleanup] Improvements to error and debug messages * [cleanup] Minor fixes and cleanup * [3speak] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [AmazonStore] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Gab] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [mediaset] Add playlist support by [nixxo](https://github.com/nixxo) * [MLSScoccer] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [N1] Add support for nova.rs by [u-spec-png](https://github.com/u-spec-png) * [PlanetMarathi] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [RaiplayRadio] Add extractors by [frafra](https://github.com/frafra) * [roosterteeth] Add series extractor * [sky] Add `SkyNewsStoryIE` by [ajj8](https://github.com/ajj8) * [youtube] Fix sorting for some videos * [youtube] Populate `thumbnail` with the best "known" thumbnail * [youtube] Refactor itag processing * [youtube] Remove unnecessary no-playlist warning * [youtube:tab] Add Invidious list for playlists/channels by [rhendric](https://github.com/rhendric) * [Bilibili:comments] Fix infinite loop by [u-spec-png](https://github.com/u-spec-png) * [ceskatelevize] Fix extractor by [flashdagger](https://github.com/flashdagger) * [Coub] Fix media format identification by [wlritchi](https://github.com/wlritchi) * [crunchyroll] Add extractor-args `language` and `hardsub` * [DiscoveryPlus] Allow language codes in URL * [imdb] Fix thumbnail by [ozburo](https://github.com/ozburo) * [instagram] Add IOS URL support by [u-spec-png](https://github.com/u-spec-png) * [instagram] Improve login code by [u-spec-png](https://github.com/u-spec-png) * [Instagram] Improve metadata extraction by [u-spec-png](https://github.com/u-spec-png) * [iPrima] Fix extractor by [stanoarn](https://github.com/stanoarn) * [itv] Add support for ITV News by [ajj8](https://github.com/ajj8) * [la7] Fix extractor by [nixxo](https://github.com/nixxo) * [linkedin] Don't login multiple times * [mtv] Fix some videos by [Sipherdrakon](https://github.com/Sipherdrakon) * [Newgrounds] Fix description by [u-spec-png](https://github.com/u-spec-png) * [Nrk] Minor fixes by [fractalf](https://github.com/fractalf) * [Olympics] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [piksel] Fix sorting * [twitter] Do not sort by codec * [viewlift] Add cookie-based login and series support by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [vimeo] Detect source extension and misc cleanup by [flashdagger](https://github.com/flashdagger) * [vimeo] Fix ondemand videos and direct URLs with hash * [vk] Fix login and add subtitles by [kaz-us](https://github.com/kaz-us) * [VLive] Add upload_date and thumbnail by [Ashish0804](https://github.com/Ashish0804) * [VRT] Fix login by [pgaig](https://github.com/pgaig) * [Vupload] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [wakanim] Add support for MPD manifests by [nyuszika7h](https://github.com/nyuszika7h) * [wakanim] Detect geo-restriction by [nyuszika7h](https://github.com/nyuszika7h) * [ZenYandex] Fix extractor by [u-spec-png](https://github.com/u-spec-png) ### 2021.10.22 * [build] Improvements * Build standalone MacOS packages by [smplayer-dev](https://github.com/smplayer-dev) * Release windows exe built with `py2exe` * Enable lazy-extractors in releases * Set env var `YTDLP_NO_LAZY_EXTRACTORS` to forcefully disable this (experimental) * Clean up error reporting in update * Refactor `pyinst.py`, misc cleanup and improve docs * [docs] Migrate issues to use forms by [Ashish0804](https://github.com/Ashish0804) * [downloader] **Fix slow progress hooks** * This was causing HLS/DASH downloads to be extremely slow in some situations * [downloader/ffmpeg] Improve simultaneous download and merge * [EmbedMetadata] Allow overwriting all default metadata with `meta_default` key * [ModifyChapters] Add ability for `--remove-chapters` to remove sections by timestamp * [utils] Allow duration strings in `--match-filter` * Add HDR information to formats * Add negative option `--no-batch-file` by [Zirro](https://github.com/Zirro) * Calculate more fields for merged formats * Do not verify thumbnail URLs unless `--check-formats` is specified * Don't create console for subprocesses on Windows * Fix `--restrict-filename` when used with default template * Fix `check_formats` output being written to stdout when `-qv` * Fix bug in storyboards * Fix conflict b/w id and ext in format selection * Fix verbose head not showing custom configs * Load archive only after printing verbose head * Make `duration_string` and `resolution` available in --match-filter * Re-implement deprecated option `--id` * Reduce default `--socket-timeout` * Write verbose header to logger * [outtmpl] Fix bug in expanding environment variables * [cookies] Local State should be opened as utf-8 * [extractor,utils] Detect more codecs/mimetypes * [extractor] Detect `EXT-X-KEY` Apple FairPlay * [utils] Use `importlib` to load plugins by [sulyi](https://github.com/sulyi) * [http] Retry on socket timeout and show the last encountered error * [fragment] Print error message when skipping fragment * [aria2c] Fix `--skip-unavailable-fragment` * [SponsorBlock] Obey `extractor-retries` and `sleep-requests` * [Merger] Do not add `aac_adtstoasc` to non-hls audio * [ModifyChapters] Do not mutate original chapters by [nihil-admirari](https://github.com/nihil-admirari) * [devscripts/run_tests] Use markers to filter tests by [sulyi](https://github.com/sulyi) * [7plus] Add cookie based authentication by [nyuszika7h](https://github.com/nyuszika7h) * [AdobePass] Fix RCN MSO by [jfogelman](https://github.com/jfogelman) * [CBC] Fix Gem livestream by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [CBC] Support CBC Gem member content by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [crunchyroll] Add season to flat-playlist * [crunchyroll] Add support for `beta.crunchyroll` URLs and fix series URLs with language code * [EUScreen] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Gronkh] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [hidive] Fix typo * [Hotstar] Mention Dynamic Range in `format_id` by [Ashish0804](https://github.com/Ashish0804) * [Hotstar] Raise appropriate error for DRM * [instagram] Add login by [u-spec-png](https://github.com/u-spec-png) * [instagram] Show appropriate error when login is needed * [microsoftstream] Add extractor by [damianoamatruda](https://github.com/damianoamatruda), [nixklai](https://github.com/nixklai) * [on24] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) * [patreon] Fix vimeo player regex by [zenerdi0de](https://github.com/zenerdi0de) * [SkyNewsAU] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [tagesschau] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [tbs] Add tbs live streams by [llacb47](https://github.com/llacb47) * [tiktok] Fix typo and update tests * [trovo] Support channel clips and VODs by [Ashish0804](https://github.com/Ashish0804) * [Viafree] Add support for Finland by [18928172992817182](https://github.com/18928172992817182) * [vimeo] Fix embedded `player.vimeo` * [vlive:channel] Fix extraction by [kikuyan](https://github.com/kikuyan), [pukkandan](https://github.com/pukkandan) * [youtube] Add auto-translated subtitles * [youtube] Expose different formats with same itag * [youtube:comments] Fix for new layout by [coletdjnz](https://github.com/coletdjnz) * [cleanup] Cleanup bilibili code by [pukkandan](https://github.com/pukkandan), [u-spec-png](https://github.com/u-spec-png) * [cleanup] Remove broken youtube login code * [cleanup] Standardize timestamp formatting code * [cleanup] Generalize `getcomments` implementation for extractors * [cleanup] Simplify search extractors code * [cleanup] misc ### 2021.10.10 * [downloader/ffmpeg] Fix bug in initializing `FFmpegPostProcessor` * [minicurses] Fix when printing to file * [downloader] Fix throttledratelimit * [francetv] Fix extractor by [fstirlitz](https://github.com/fstirlitz), [sarnoud](https://github.com/sarnoud) * [NovaPlay] Add extractor by [Bojidarist](https://github.com/Bojidarist) * [ffmpeg] Revert "Set max probesize" - No longer needed * [docs] Remove incorrect dependency on VC++10 * [build] Allow to release without changelog ### 2021.10.09 * Improved progress reporting * Separate `--console-title` and `--no-progress` * Add option `--progress` to show progress-bar even in quiet mode * Fix and refactor `minicurses` and use it for all progress reporting * Standardize use of terminal sequences and enable color support for windows 10 * Add option `--progress-template` to customize progress-bar and console-title * Add postprocessor hooks and progress reporting * [postprocessor] Add plugin support with option `--use-postprocessor` * [extractor] Extract storyboards from SMIL manifests by [fstirlitz](https://github.com/fstirlitz) * [outtmpl] Alternate form of format type `l` for `\n` delimited list * [outtmpl] Format type `U` for unicode normalization * [outtmpl] Allow empty output template to skip a type of file * Merge webm formats into mkv if thumbnails are to be embedded * [adobepass] Add RCN as MSO by [jfogelman](https://github.com/jfogelman) * [ciscowebex] Add extractor by [damianoamatruda](https://github.com/damianoamatruda) * [Gettr] Add extractor by [i6t](https://github.com/i6t) * [GoPro] Add extractor by [i6t](https://github.com/i6t) * [N1] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [Theta] Add video extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Veo] Add extractor by [i6t](https://github.com/i6t) * [Vupload] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [bbc] Extract better quality videos by [ajj8](https://github.com/ajj8) * [Bilibili] Add subtitle converter by [u-spec-png](https://github.com/u-spec-png) * [CBC] Cleanup tests by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [Douyin] Rewrite extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [Funimation] Fix for /v/ urls by [pukkandan](https://github.com/pukkandan), [Jules-A](https://github.com/Jules-A) * [Funimation] Sort formats according to the relevant extractor-args * [Hidive] Fix duplicate and incorrect formats * [HotStarSeries] Fix cookies by [Ashish0804](https://github.com/Ashish0804) * [LinkedInLearning] Add subtitles by [Ashish0804](https://github.com/Ashish0804) * [Mediaite] Relax valid url by [coletdjnz](https://github.com/coletdjnz) * [Newgrounds] Add age_limit and fix duration by [u-spec-png](https://github.com/u-spec-png) * [Newgrounds] Fix view count on songs by [u-spec-png](https://github.com/u-spec-png) * [parliamentlive.tv] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [PolskieRadio] Fix extractors by [jakubadamw](https://github.com/jakubadamw), [u-spec-png](https://github.com/u-spec-png) * [reddit] Add embedded url by [u-spec-png](https://github.com/u-spec-png) * [reddit] Fix 429 by generating a random `reddit_session` by [AjaxGb](https://github.com/AjaxGb) * [Rumble] Add RumbleChannelIE by [Ashish0804](https://github.com/Ashish0804) * [soundcloud:playlist] Detect last page correctly * [SovietsCloset] Add duration from m3u8 by [ChillingPepper](https://github.com/ChillingPepper) * [Streamable] Add codecs by [u-spec-png](https://github.com/u-spec-png) * [vidme] Remove extractor by [alerikaisattera](https://github.com/alerikaisattera) * [youtube:tab] Fallback to API when webpage fails to download by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix non-fatal errors in fetching player * Fix `--flat-playlist` when neither IE nor id is known * Fix `-f mp4` behaving differently from youtube-dl * Workaround for bug in `ssl.SSLContext.load_default_certs` * [aes] Improve performance slightly by [sulyi](https://github.com/sulyi) * [cookies] Fix keyring fallback by [mbway](https://github.com/mbway) * [embedsubtitle] Fix error when duration is unknown * [ffmpeg] Fix error when subtitle file is missing * [ffmpeg] Set max probesize to workaround AAC HLS stream issues by [shirt](https://github.com/shirt-dev) * [FixupM3u8] Remove redundant run if merged is needed * [hls] Fix decryption issues by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) * [http] Respect user-provided chunk size over extractor's * [utils] Let traverse_obj accept functions as keys * [docs] Add note about our custom ffmpeg builds * [docs] Write embedding and contributing documentation by [pukkandan](https://github.com/pukkandan), [timethrow](https://github.com/timethrow) * [update] Check for new version even if not updateable * [build] Add more files to the tarball * [build] Allow building with py2exe (and misc fixes) * [build] Use pycryptodomex by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) * [cleanup] Some minor refactoring, improve docs and misc cleanup ### 2021.09.25 * Add new option `--netrc-location` * [outtmpl] Allow alternate fields using `,` * [outtmpl] Add format type `B` to treat the value as bytes, e.g. to limit the filename to a certain number of bytes * Separate the options `--ignore-errors` and `--no-abort-on-error` * Basic framework for simultaneous download of multiple formats by [nao20010128nao](https://github.com/nao20010128nao) * [17live] Add 17.live extractor by [nao20010128nao](https://github.com/nao20010128nao) * [bilibili] Add BiliIntlIE and BiliIntlSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [CAM4] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Chingari] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [CGTN] Add extractor by [chao813](https://github.com/chao813) * [damtomo] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [gotostage] Add extractor by [poschi3](https://github.com/poschi3) * [Koo] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Mediaite] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Mediaklikk] Add Extractor by [tmarki](https://github.com/tmarki), [mrx23dot](https://github.com/mrx23dot), [coletdjnz](https://github.com/coletdjnz) * [MuseScore] Add Extractor by [Ashish0804](https://github.com/Ashish0804) * [Newgrounds] Add NewgroundsUserIE and improve extractor by [u-spec-png](https://github.com/u-spec-png) * [nzherald] Add NZHeraldIE by [coletdjnz](https://github.com/coletdjnz) * [Olympics] Add replay extractor by [Ashish0804](https://github.com/Ashish0804) * [Peertube] Add channel and playlist extractors by [u-spec-png](https://github.com/u-spec-png) * [radlive] Add extractor by [nyuszika7h](https://github.com/nyuszika7h) * [SovietsCloset] Add extractor by [ChillingPepper](https://github.com/ChillingPepper) * [Streamanity] Add Extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Theta] Add extractor by [alerikaisattera](https://github.com/alerikaisattera) * [Yandex] Add ZenYandexIE and ZenYandexChannelIE by [Ashish0804](https://github.com/Ashish0804) * [9Now] handle episodes of series by [dalanmiller](https://github.com/dalanmiller) * [AnimalPlanet] Fix extractor by [Sipherdrakon](https://github.com/Sipherdrakon) * [Arte] Improve description extraction by [renalid](https://github.com/renalid) * [atv.at] Use jwt for API by [NeroBurner](https://github.com/NeroBurner) * [brightcove] Extract subtitles from manifests * [CBC] Fix CBC Gem extractors by [makeworld-the-better-one](https://github.com/makeworld-the-better-one) * [cbs] Report appropriate error for DRM * [comedycentral] Support `collection-playlist` by [nixxo](https://github.com/nixxo) * [DIYNetwork] Support new format by [Sipherdrakon](https://github.com/Sipherdrakon) * [downloader/niconico] Pass custom headers by [nao20010128nao](https://github.com/nao20010128nao) * [dw] Fix extractor * [Fancode] Fix live streams by [zenerdi0de](https://github.com/zenerdi0de) * [funimation] Fix for locations outside US by [Jules-A](https://github.com/Jules-A), [pukkandan](https://github.com/pukkandan) * [globo] Fix GloboIE by [Ashish0804](https://github.com/Ashish0804) * [HiDive] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [Hotstar] Add referer for subs by [Ashish0804](https://github.com/Ashish0804) * [itv] Fix extractor, add subtitles and thumbnails by [coletdjnz](https://github.com/coletdjnz), [sleaux-meaux](https://github.com/sleaux-meaux), [Vangelis66](https://github.com/Vangelis66) * [lbry] Show error message from API response * [Mxplayer] Use mobile API by [Ashish0804](https://github.com/Ashish0804) * [NDR] Rewrite NDRIE by [Ashish0804](https://github.com/Ashish0804) * [Nuvid] Fix extractor by [u-spec-png](https://github.com/u-spec-png) * [Oreilly] Handle new web url by [MKSherbini](https://github.com/MKSherbini) * [pbs] Fix subtitle extraction by [coletdjnz](https://github.com/coletdjnz), [gesa](https://github.com/gesa), [raphaeldore](https://github.com/raphaeldore) * [peertube] Update instances by [u-spec-png](https://github.com/u-spec-png) * [plutotv] Fix extractor for URLs with `/en` * [reddit] Workaround for 429 by redirecting to old.reddit.com * [redtube] Fix exts * [soundcloud] Make playlist extraction lazy * [soundcloud] Retry playlist pages on `502` error and update `_CLIENT_ID` * [southpark] Fix SouthParkDE by [coletdjnz](https://github.com/coletdjnz) * [SovietsCloset] Fix playlists for games with only named categories by [ConquerorDopy](https://github.com/ConquerorDopy) * [SpankBang] Fix uploader by [f4pp3rk1ng](https://github.com/f4pp3rk1ng), [coletdjnz](https://github.com/coletdjnz) * [tiktok] Use API to fetch higher quality video by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) * [TikTokUser] Fix extractor using mobile API by [MinePlayersPE](https://github.com/MinePlayersPE), [llacb47](https://github.com/llacb47) * [videa] Fix some extraction errors by [nyuszika7h](https://github.com/nyuszika7h) * [VrtNU] Handle login errors by [llacb47](https://github.com/llacb47) * [vrv] Don't raise error when thumbnails are missing * [youtube] Cleanup authentication code by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix `--mark-watched` with `--cookies-from-browser` * [youtube] Improvements to JS player extraction and add extractor-args to skip it by [coletdjnz](https://github.com/coletdjnz) * [youtube] Retry on 'Unknown Error' by [coletdjnz](https://github.com/coletdjnz) * [youtube] Return full URL instead of just ID * [youtube] Warn when trying to download clips * [zdf] Improve format sorting * [zype] Extract subtitles from the m3u8 manifest by [fstirlitz](https://github.com/fstirlitz) * Allow `--force-write-archive` to work with `--flat-playlist` * Download subtitles in order of `--sub-langs` * Allow `0` in `--playlist-items` * Handle more playlist errors with `-i` * Fix `--no-get-comments` * Fix `extra_info` being reused across runs * Fix compat options `no-direct-merge` and `playlist-index` * Dump files should obey `--trim-filename` by [sulyi](https://github.com/sulyi) * [aes] Add `aes_gcm_decrypt_and_verify` by [sulyi](https://github.com/sulyi), [pukkandan](https://github.com/pukkandan) * [aria2c] Fix IV for some AES-128 streams by [shirt](https://github.com/shirt-dev) * [compat] Don't ignore `HOME` (if set) on windows * [cookies] Make browser names case insensitive * [cookies] Print warning for cookie decoding error only once * [extractor] Fix root-relative URLs in MPD by [DigitalDJ](https://github.com/DigitalDJ) * [ffmpeg] Add `aac_adtstoasc` when merging if needed * [fragment,aria2c] Generalize and refactor some code * [fragment] Avoid repeated request for AES key * [fragment] Fix range header when using `-N` and media sequence by [shirt](https://github.com/shirt-dev) * [hls,aes] Fallback to native implementation for AES-CBC and detect `Cryptodome` in addition to `Crypto` * [hls] Byterange + AES128 is supported by native downloader * [ModifyChapters] Improve sponsor chapter merge algorithm by [nihil-admirari](https://github.com/nihil-admirari) * [ModifyChapters] Minor fixes * [WebVTT] Adjust parser to accommodate PBS subtitles * [utils] Improve `extract_timezone` by [dirkf](https://github.com/dirkf) * [options] Fix `--no-config` and refactor reading of config files * [options] Strip spaces and ignore empty entries in list-like switches * [test/cookies] Improve logging * [build] Automate more of the release process by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) * [build] Fix sha256 by [nihil-admirari](https://github.com/nihil-admirari) * [build] Bring back brew taps by [nao20010128nao](https://github.com/nao20010128nao) * [build] Provide `--onedir` zip for windows * [cleanup,docs] Add deprecation warning in docs for some counter intuitive behaviour * [cleanup] Fix line endings for `nebula.py` by [glenn-slayden](https://github.com/glenn-slayden) * [cleanup] Improve `make clean-test` by [sulyi](https://github.com/sulyi) * [cleanup] Misc ### 2021.09.02 * **Native SponsorBlock** implementation by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * `--sponsorblock-remove CATS` removes specified chapters from file * `--sponsorblock-mark CATS` marks the specified sponsor sections as chapters * `--sponsorblock-chapter-title TMPL` to specify sponsor chapter template * `--sponsorblock-api URL` to use a different API * No re-encoding is done unless `--force-keyframes-at-cuts` is used * The fetched sponsor sections are written to the infojson * Deprecates: `--sponskrub`, `--no-sponskrub`, `--sponskrub-cut`, `--no-sponskrub-cut`, `--sponskrub-force`, `--no-sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` * Split `--embed-chapters` from `--embed-metadata` (it still implies the former by default) * Add option `--remove-chapters` to remove arbitrary chapters by [nihil-admirari](https://github.com/nihil-admirari), [pukkandan](https://github.com/pukkandan) * Add option `--force-keyframes-at-cuts` for more accurate cuts when removing and splitting chapters by [nihil-admirari](https://github.com/nihil-admirari) * Let `--match-filter` reject entries early * Makes redundant: `--match-title`, `--reject-title`, `--min-views`, `--max-views` * [lazy_extractor] Improvements (It now passes all tests) * Bugfix for when plugin directory doesn't exist by [kidonng](https://github.com/kidonng) * Create instance only after pre-checking archive * Import actual class if an attribute is accessed * Fix `suitable` and add flake8 test * [downloader/ffmpeg] Experimental support for DASH manifests (including live) * Your ffmpeg must have [this patch](https://github.com/FFmpeg/FFmpeg/commit/3249c757aed678780e22e99a1a49f4672851bca9) applied for YouTube DASH to work * [downloader/ffmpeg] Allow passing custom arguments before `-i` * [BannedVideo] Add extractor by [smege1001](https://github.com/smege1001), [blackjack4494](https://github.com/blackjack4494), [pukkandan](https://github.com/pukkandan) * [bilibili] Add category extractor by [animelover1984](https://github.com/animelover1984) * [Epicon] Add extractors by [Ashish0804](https://github.com/Ashish0804) * [filmmodu] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * [GabTV] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` by [Ashish0804](https://github.com/Ashish0804) * [ManotoTV] Add new extractors by [tandy1000](https://github.com/tandy1000) * [Niconico] Add Search extractors by [animelover1984](https://github.com/animelover1984), [pukkandan](https://github.com/pukkandan) * [Patreon] Add `PatreonUserIE` by [zenerdi0de](https://github.com/zenerdi0de) * [peloton] Add extractor by [IONECarter](https://github.com/IONECarter), [capntrips](https://github.com/capntrips), [pukkandan](https://github.com/pukkandan) * [ProjectVeritas] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [radiko] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [StarTV] Add extractor for `startv.com.tr` by [mrfade](https://github.com/mrfade), [coletdjnz](https://github.com/coletdjnz) * [tiktok] Add `TikTokUserIE` by [Ashish0804](https://github.com/Ashish0804), [pukkandan](https://github.com/pukkandan) * [Tokentube] Add extractor by [u-spec-png](https://github.com/u-spec-png) * [TV2Hu] Fix `TV2HuIE` and add `TV2HuSeriesIE` by [Ashish0804](https://github.com/Ashish0804) * [voicy] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [adobepass] Fix Verizon SAML login by [nyuszika7h](https://github.com/nyuszika7h), [ParadoxGBB](https://github.com/ParadoxGBB) * [afreecatv] Fix adult VODs by [wlritchi](https://github.com/wlritchi) * [afreecatv] Tolerate failure to parse date string by [wlritchi](https://github.com/wlritchi) * [aljazeera] Fix extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [ATV.at] Fix extractor for ATV.at by [NeroBurner](https://github.com/NeroBurner), [coletdjnz](https://github.com/coletdjnz) * [bitchute] Fix test by [mahanstreamer](https://github.com/mahanstreamer) * [camtube] Remove obsolete extractor by [alerikaisattera](https://github.com/alerikaisattera) * [CDA] Add more formats by [u-spec-png](https://github.com/u-spec-png) * [eroprofile] Fix page skipping in albums by [jhwgh1968](https://github.com/jhwgh1968) * [facebook] Fix format sorting * [facebook] Fix metadata extraction by [kikuyan](https://github.com/kikuyan) * [facebook] Update onion URL by [Derkades](https://github.com/Derkades) * [HearThisAtIE] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [instagram] Add referrer to prevent throttling by [u-spec-png](https://github.com/u-spec-png), [kikuyan](https://github.com/kikuyan) * [iwara.tv] Extract more metadata by [BunnyHelp](https://github.com/BunnyHelp) * [iwara] Add thumbnail by [i6t](https://github.com/i6t) * [kakao] Fix extractor * [mediaset] Fix extraction for some videos by [nyuszika7h](https://github.com/nyuszika7h) * [Motherless] Fix extractor by [coletdjnz](https://github.com/coletdjnz) * [Nova] fix extractor by [std-move](https://github.com/std-move) * [ParamountPlus] Fix geo verification by [shirt](https://github.com/shirt-dev) * [peertube] handle new video URL format by [Chocobozzz](https://github.com/Chocobozzz) * [pornhub] Separate and fix playlist extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * [reddit] Fix for quarantined subreddits by [ouwou](https://github.com/ouwou) * [ShemarooMe] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [soundcloud] Refetch `client_id` on 403 * [tiktok] Fix metadata extraction * [TV2] Fix extractor by [Ashish0804](https://github.com/Ashish0804) * [tv5mondeplus] Fix extractor by [korli](https://github.com/korli) * [VH1,TVLand] Fix extractors by [Sipherdrakon](https://github.com/Sipherdrakon) * [Viafree] Fix extractor and extract subtitles by [coletdjnz](https://github.com/coletdjnz) * [XHamster] Extract `uploader_id` by [octotherp](https://github.com/octotherp) * [youtube] Add `shorts` to `_VALID_URL` * [youtube] Add av01 itags to known formats list by [blackjack4494](https://github.com/blackjack4494) * [youtube] Extract error messages from HTTPError response by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix subtitle names * [youtube] Prefer audio stream that YouTube considers default * [youtube] Remove annotations and deprecate `--write-annotations` by [coletdjnz](https://github.com/coletdjnz) * [Zee5] Fix extractor and add subtitles by [Ashish0804](https://github.com/Ashish0804) * [aria2c] Obey `--rate-limit` * [EmbedSubtitle] Continue even if some files are missing * [extractor] Better error message for DRM * [extractor] Common function `_match_valid_url` * [extractor] Show video id in error messages if possible * [FormatSort] Remove priority of `lang` * [options] Add `_set_from_options_callback` * [SubtitleConvertor] Fix bug during subtitle conversion * [utils] Add `parse_qs` * [webvtt] Fix timestamp overflow adjustment by [fstirlitz](https://github.com/fstirlitz) * Bugfix for `--replace-in-metadata` * Don't try to merge with final extension * Fix `--force-overwrites` when using `-k` * Fix `--no-prefer-free-formats` by [CeruleanSky](https://github.com/CeruleanSky) * Fix `-F` for extractors that directly return url * Fix `-J` when there are failed videos * Fix `extra_info` being reused across runs * Fix `playlist_index` not obeying `playlist_start` and add tests * Fix resuming of single formats when using `--no-part` * Revert erroneous use of the `Content-Length` header by [fstirlitz](https://github.com/fstirlitz) * Use `os.replace` where applicable by; paulwrubel * [build] Add homebrew taps `yt-dlp/taps/yt-dlp` by [nao20010128nao](https://github.com/nao20010128nao) * [build] Fix bug in making `yt-dlp.tar.gz` * [docs] Fix some typos by [pukkandan](https://github.com/pukkandan), [zootedb0t](https://github.com/zootedb0t) * [cleanup] Replace improper use of tab in trovo by [glenn-slayden](https://github.com/glenn-slayden) ### 2021.08.10 * Add option `--replace-in-metadata` * Add option `--no-simulate` to not simulate even when `--print` or `--list...` are used - Deprecates `--print-json` * Allow entire infodict to be printed using `%()s` - makes `--dump-json` redundant * Allow multiple `--exec` and `--exec-before-download` * Add regex to `--match-filter` * Add all format filtering operators also to `--match-filter` by [max-te](https://github.com/max-te) * Add compat-option `no-keep-subs` * [adobepass] Add MSO Cablevision by [Jessecar96](https://github.com/Jessecar96) * [BandCamp] Add BandcampMusicIE by [Ashish0804](https://github.com/Ashish0804) * [blackboardcollaborate] Add new extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * [eroprofile] Add album downloader by [jhwgh1968](https://github.com/jhwgh1968) * [mirrativ] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [openrec] Add extractors by [nao20010128nao](https://github.com/nao20010128nao) * [nbcolympics:stream] Fix extractor by [nchilada](https://github.com/nchilada), [pukkandan](https://github.com/pukkandan) * [nbcolympics] Update extractor for 2020 olympics by [wesnm](https://github.com/wesnm) * [paramountplus] Separate extractor and fix some titles by [shirt](https://github.com/shirt-dev), [pukkandan](https://github.com/pukkandan) * [RCTIPlus] Support events and TV by [MinePlayersPE](https://github.com/MinePlayersPE) * [Newgrounds] Improve extractor and fix playlist by [u-spec-png](https://github.com/u-spec-png) * [aenetworks] Update `_THEPLATFORM_KEY` and `_THEPLATFORM_SECRET` by [wesnm](https://github.com/wesnm) * [crunchyroll] Fix thumbnail by [funniray](https://github.com/funniray) * [HotStar] Use API for metadata and extract subtitles by [Ashish0804](https://github.com/Ashish0804) * [instagram] Fix comments extraction by [u-spec-png](https://github.com/u-spec-png) * [peertube] Fix videos without description by [u-spec-png](https://github.com/u-spec-png) * [twitch:clips] Extract `display_id` by [dirkf](https://github.com/dirkf) * [viki] Print error message from API request * [Vine] Remove invalid formats by [u-spec-png](https://github.com/u-spec-png) * [VrtNU] Fix XSRF token by [pgaig](https://github.com/pgaig) * [vrv] Fix thumbnail extraction by [funniray](https://github.com/funniray) * [youtube] Add extractor-arg `include-live-dash` to show live dash formats * [youtube] Improve signature function detection by [PSlava](https://github.com/PSlava) * [youtube] Raise appropriate error when API pages can't be downloaded * Ensure `_write_ytdl_file` closes file handle on error * Fix `--compat-options filename` by [stdedos](https://github.com/stdedos) * Fix issues with infodict sanitization * Fix resuming when using `--no-part` * Fix wrong extension for intermediate files * Handle `BrokenPipeError` by [kikuyan](https://github.com/kikuyan) * Show libraries present in verbose head * [extractor] Detect `sttp` as subtitles in MPD by [fstirlitz](https://github.com/fstirlitz) * [extractor] Reset non-repeating warnings per video * [ffmpeg] Fix streaming `mp4` to `stdout` * [ffpmeg] Allow `--ffmpeg-location` to be a file with different name * [utils] Fix `InAdvancePagedList.__getitem__` * [utils] Fix `traverse_obj` depth when `is_user_input` * [webvtt] Merge daisy-chained duplicate cues by [fstirlitz](https://github.com/fstirlitz) * [build] Use custom build of `pyinstaller` by [shirt](https://github.com/shirt-dev) * [tests:download] Add batch testing for extractors (`test_YourExtractor_all`) * [docs] Document which fields `--add-metadata` adds to the file * [docs] Fix some mistakes and improve doc * [cleanup] Misc code cleanup ### 2021.08.02 * Add logo, banner and donate links * [outtmpl] Expand and escape environment variables * [outtmpl] Add format types `j` (json), `l` (comma delimited list), `q` (quoted for terminal) * [downloader] Allow streaming some unmerged formats to stdout using ffmpeg * [youtube] **Age-gate bypass** * Add `agegate` clients by [pukkandan](https://github.com/pukkandan), [MinePlayersPE](https://github.com/MinePlayersPE) * Add `thirdParty` to agegate clients to bypass more videos * Simplify client definitions, expose `embedded` clients * Improve age-gate detection by [coletdjnz](https://github.com/coletdjnz) * Fix default global API key by [coletdjnz](https://github.com/coletdjnz) * Add `creator` clients for age-gate bypass using unverified accounts by [zerodytrash](https://github.com/zerodytrash), [coletdjnz](https://github.com/coletdjnz), [pukkandan](https://github.com/pukkandan) * [adobepass] Add MSO Sling TV by [wesnm](https://github.com/wesnm) * [CBS] Add ParamountPlusSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [dplay] Add `ScienceChannelIE` by [Sipherdrakon](https://github.com/Sipherdrakon) * [UtreonIE] Add extractor by [Ashish0804](https://github.com/Ashish0804) * [youtube] Add `mweb` client by [coletdjnz](https://github.com/coletdjnz) * [youtube] Add `player_client=all` * [youtube] Force `hl=en` for comments by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix format sorting when using alternate clients * [youtube] Misc cleanup by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz) * [youtube] Extract SAPISID only once * [CBS] Add fallback by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan) * [Hotstar] Support cookies by [Ashish0804](https://github.com/Ashish0804) * [HotStarSeriesIE] Fix regex by [Ashish0804](https://github.com/Ashish0804) * [bilibili] Improve `_VALID_URL` * [mediaset] Fix extraction by [nixxo](https://github.com/nixxo) * [Mxplayer] Add h265 formats by [Ashish0804](https://github.com/Ashish0804) * [RCTIPlus] Remove PhantomJS dependency by [MinePlayersPE](https://github.com/MinePlayersPE) * [tenplay] Add MA15+ age limit by [pento](https://github.com/pento) * [vidio] Fix login error detection by [MinePlayersPE](https://github.com/MinePlayersPE) * [vimeo] Better extraction of original file by [Ashish0804](https://github.com/Ashish0804) * [generic] Support KVS player (replaces ThisVidIE) by [rigstot](https://github.com/rigstot) * Add compat-option `no-clean-infojson` * Remove `asr` appearing twice in `-F` * Set `home:` as the default key for `-P` * [utils] Fix slicing of reversed `LazyList` * [FormatSort] Fix bug for audio with unknown codec * [test:download] Support testing with `ignore_no_formats_error` * [cleanup] Refactor some code ### 2021.07.24 * [youtube:tab] Extract video duration early * [downloader] Pass `info_dict` to `progress_hook`s * [youtube] Fix age-gated videos for API clients when cookies are supplied by [coletdjnz](https://github.com/coletdjnz) * [youtube] Disable `get_video_info` age-gate workaround - This endpoint seems to be completely dead * [youtube] Try all clients even if age-gated * [youtube] Fix subtitles only being extracted from the first client * [youtube] Simplify `_get_text` * [cookies] bugfix for microsoft edge on macOS * [cookies] Handle `sqlite` `ImportError` gracefully by [mbway](https://github.com/mbway) * [cookies] Handle errors when importing `keyring` ### 2021.07.21 * **Add option `--cookies-from-browser`** to load cookies from a browser by [mbway](https://github.com/mbway) * Usage: `--cookies-from-browser BROWSER[:PROFILE_NAME_OR_PATH]` * Also added `--no-cookies-from-browser` * To decrypt chromium cookies, `keyring` is needed for UNIX and `pycryptodome` for Windows * Add option `--exec-before-download` * Add field `live_status` * [FFmpegMetadata] Add language of each stream and some refactoring * [douyin] Add extractor by [pukkandan](https://github.com/pukkandan), [pyx](https://github.com/pyx) * [pornflip] Add extractor by [mzbaulhaque](https://github.com/mzbaulhaque) * **[youtube] Extract data from multiple clients** by [pukkandan](https://github.com/pukkandan), [coletdjnz](https://github.com/coletdjnz) * `player_client` now accepts multiple clients * Default `player_client` = `android,web` * This uses twice as many requests, but avoids throttling for most videos while also not losing any formats * Music clients can be specifically requested and is enabled by default if `music.youtube.com` * Added `player_client=ios` (Known issue: formats from ios are not sorted correctly) * Add age-gate bypass for android and ios clients * [youtube] Extract more thumbnails * The thumbnail URLs are hard-coded and their actual existence is tested lazily * Added option `--no-check-formats` to not test them * [youtube] Misc fixes * Improve extraction of livestream metadata by [pukkandan](https://github.com/pukkandan), [krichbanana](https://github.com/krichbanana) * Hide live dash formats since they can't be downloaded anyway * Fix authentication when using multiple accounts by [coletdjnz](https://github.com/coletdjnz) * Fix controversial videos when requested via API by [coletdjnz](https://github.com/coletdjnz) * Fix session index extraction and headers for non-web player clients by [coletdjnz](https://github.com/coletdjnz) * Make `--extractor-retries` work for more errors * Fix sorting of 3gp format * Sanity check `chapters` (and refactor related code) * Make `parse_time_text` and `_extract_chapters` non-fatal * Misc cleanup and bug fixes by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Fix channels tab * [youtube:tab] Extract playlist availability by [coletdjnz](https://github.com/coletdjnz) * **[youtube:comments] Move comment extraction to new API** by [coletdjnz](https://github.com/coletdjnz) * Adds extractor-args `comment_sort` (`top`/`new`), `max_comments`, `max_comment_depth` * [youtube:comments] Fix `is_favorited`, improve `like_count` parsing by [coletdjnz](https://github.com/coletdjnz) * [BravoTV] Improve metadata extraction by [kevinoconnor7](https://github.com/kevinoconnor7) * [crunchyroll:playlist] Force http * [yahoo:gyao:player] Relax `_VALID_URL` by [nao20010128nao](https://github.com/nao20010128nao) * [nebula] Authentication via tokens from cookie jar by [hheimbuerger](https://github.com/hheimbuerger), [TpmKranz](https://github.com/TpmKranz) * [RTP] Fix extraction and add subtitles by [fstirlitz](https://github.com/fstirlitz) * [viki] Rewrite extractors and add extractor-arg `video_types` to `vikichannel` by [zackmark29](https://github.com/zackmark29), [pukkandan](https://github.com/pukkandan) * [vlive] Extract thumbnail directly in addition to the one from Naver * [generic] Extract previously missed subtitles by [fstirlitz](https://github.com/fstirlitz) * [generic] Extract everything in the SMIL manifest and detect discarded subtitles by [fstirlitz](https://github.com/fstirlitz) * [embedthumbnail] Fix `_get_thumbnail_resolution` * [metadatafromfield] Do not detect numbers as field names * Fix selectors `all`, `mergeall` and add tests * Errors in playlist extraction should obey `--ignore-errors` * Fix bug where `original_url` was not propagated when `_type`=`url` * Revert "Merge webm formats into mkv if thumbnails are to be embedded (#173)" * This was wrongly checking for `write_thumbnail` * Improve `extractor_args` parsing * Rename `NOTE` in `-F` to `MORE INFO` since it's often confused to be the same as `format_note` * Add `only_once` param for `write_debug` and `report_warning` * [extractor] Allow extracting multiple groups in `_search_regex` by [fstirlitz](https://github.com/fstirlitz) * [utils] Improve `traverse_obj` * [utils] Add `variadic` * [utils] Improve `js_to_json` comment regex by [fstirlitz](https://github.com/fstirlitz) * [webtt] Fix timestamps * [compat] Remove unnecessary code * [docs] fix default of multistreams ### 2021.07.07 * Merge youtube-dl: Upto [commit/a803582](https://github.com/ytdl-org/youtube-dl/commit/a8035827177d6b59aca03bd717acb6a9bdd75ada) * Add `--extractor-args` to pass some extractor-specific arguments. See [readme](https://github.com/yt-dlp/yt-dlp#extractor-arguments) * Add extractor option `skip` for `youtube`, e.g. `--extractor-args youtube:skip=hls,dash` * Deprecates `--youtube-skip-dash-manifest`, `--youtube-skip-hls-manifest`, `--youtube-include-dash-manifest`, `--youtube-include-hls-manifest` * Allow `--list...` options to work with `--print`, `--quiet` and other `--list...` options * [youtube] Use `player` API for additional video extraction requests by [coletdjnz](https://github.com/coletdjnz) * **Fixes youtube premium music** (format 141) extraction * Adds extractor option `player_client` = `web`/`android` * **`--extractor-args youtube:player_client=android` works around the throttling** for the time-being * Adds extractor option `player_skip=config` * Adds age-gate fallback using embedded client * [youtube] Choose correct Live chat API for upcoming streams by [krichbanana](https://github.com/krichbanana) * [youtube] Fix subtitle names for age-gated videos * [youtube:comments] Fix error handling and add `itct` to params by [coletdjnz](https://github.com/coletdjnz) * [youtube_live_chat] Fix download with cookies by [siikamiika](https://github.com/siikamiika) * [youtube_live_chat] use `clickTrackingParams` by [siikamiika](https://github.com/siikamiika) * [Funimation] Rewrite extractor * Add `FunimationShowIE` by [Mevious](https://github.com/Mevious) * **Treat the different versions of an episode as different formats of a single video** * This changes the video `id` and will break break existing archives * Compat option `seperate-video-versions` to fall back to old behavior including using the old video ids * Support direct `/player/` URL * Extractor options `language` and `version` to pre-select them during extraction * These options may be removed in the future if we can extract all formats without additional network requests * Do not rely on these for format selection and use `-f` filters instead * [AdobePass] Add Spectrum MSO by [kevinoconnor7](https://github.com/kevinoconnor7), [ohmybahgosh](https://github.com/ohmybahgosh) * [facebook] Extract description and fix title * [fancode] Fix extraction, support live and allow login with refresh token by [zenerdi0de](https://github.com/zenerdi0de) * [plutotv] Improve `_VALID_URL` * [RCTIPlus] Add extractor by [MinePlayersPE](https://github.com/MinePlayersPE) * [Soundcloud] Allow login using oauth token by [blackjack4494](https://github.com/blackjack4494) * [TBS] Support livestreams by [llacb47](https://github.com/llacb47) * [videa] Fix extraction by [nyuszika7h](https://github.com/nyuszika7h) * [yahoo] Fix extraction by [llacb47](https://github.com/llacb47), [pukkandan](https://github.com/pukkandan) * Process videos when using `--ignore-no-formats-error` by [krichbanana](https://github.com/krichbanana) * Fix `--throttled-rate` when using `--load-info-json` * Fix `--flat-playlist` when entry has no `ie_key` * Fix `check_formats` catching `ExtractorError` instead of `DownloadError` * Fix deprecated option `--list-formats-old` * [downloader/ffmpeg] Fix `--ppa` when using simultaneous download * [extractor] Prevent unnecessary download of hls manifests and refactor `hls_split_discontinuity` * [fragment] Handle status of download and errors in threads correctly; and minor refactoring * [thumbnailsconvertor] Treat `jpeg` as `jpg` * [utils] Fix issues with `LazyList` reversal * [extractor] Allow extractors to set their own login hint * [cleanup] Simplify format selector code with `LazyList` and `yield from` * [cleanup] Clean `extractor.common._merge_subtitles` signature * [cleanup] Fix some typos ### 2021.06.23 * Merge youtube-dl: Upto [commit/379f52a](https://github.com/ytdl-org/youtube-dl/commit/379f52a4954013767219d25099cce9e0f9401961) * **Add option `--throttled-rate`** below which video data is re-extracted * [fragment] **Merge during download for `-N`**, and refactor `hls`/`dash` * [websockets] Add `WebSocketFragmentFD` by [nao20010128nao](https://github.com/nao20010128nao), [pukkandan](https://github.com/pukkandan) * Allow `images` formats in addition to video/audio * [downloader/mhtml] Add new downloader for slideshows/storyboards by [fstirlitz](https://github.com/fstirlitz) * [youtube] Temporary **fix for age-gate** * [youtube] Support ongoing live chat by [siikamiika](https://github.com/siikamiika) * [youtube] Improve SAPISID cookie handling by [coletdjnz](https://github.com/coletdjnz) * [youtube] Login is not needed for `:ytrec` * [youtube] Non-fatal alert reporting for unavailable videos page by [coletdjnz](https://github.com/coletdjnz) * [twitcasting] Websocket support by [nao20010128nao](https://github.com/nao20010128nao) * [mediasite] Extract slides by [fstirlitz](https://github.com/fstirlitz) * [funimation] Extract subtitles * [pornhub] Extract `cast` * [hotstar] Use server time for authentication instead of local time * [EmbedThumbnail] Fix for already downloaded thumbnail * [EmbedThumbnail] Add compat-option `embed-thumbnail-atomicparsley` * Expand `--check-formats` to thumbnails * Fix id sanitization in filenames * Skip fixup of existing files and add `--fixup force` to force it * Better error handling of syntax errors in `-f` * Use `NamedTemporaryFile` for `--check-formats` * [aria2c] Lower `--min-split-size` for HTTP downloads * [options] Rename `--add-metadata` to `--embed-metadata` * [utils] Improve `LazyList` and add tests * [build] Build Windows x86 version with py3.7 and remove redundant tests by [pukkandan](https://github.com/pukkandan), [shirt](https://github.com/shirt-dev) * [docs] Clarify that `--embed-metadata` embeds chapter markers * [cleanup] Refactor fixup ### 2021.06.09 * Fix bug where `%(field)d` in filename template throws error * [outtmpl] Improve offset parsing * [test] More rigorous tests for `prepare_filename` ### 2021.06.08 * Remove support for obsolete Python versions: Only 3.6+ is now supported * Merge youtube-dl: Upto [commit/c2350ca](https://github.com/ytdl-org/youtube-dl/commit/c2350cac243ba1ec1586fe85b0d62d1b700047a2) * [hls] Fix decryption for multithreaded downloader * [extractor] Fix pre-checking archive for some extractors * [extractor] Fix FourCC fallback when parsing ISM by [fstirlitz](https://github.com/fstirlitz) * [twitcasting] Add TwitCastingUserIE, TwitCastingLiveIE by [pukkandan](https://github.com/pukkandan), [nao20010128nao](https://github.com/nao20010128nao) * [vidio] Add VidioPremierIE and VidioLiveIE by [MinePlayersPE](Https://github.com/MinePlayersPE) * [viki] Fix extraction from [ytdl-org/youtube-dl@59e583f](https://github.com/ytdl-org/youtube-dl/commit/59e583f7e8530ca92776c866897d895c072e2a82) * [youtube] Support shorts URL * [zoom] Extract transcripts as subtitles * Add field `original_url` with the user-inputted URL * Fix and refactor `prepare_outtmpl` * Make more fields available for `--print` when used with `--flat-playlist` * [utils] Generalize `traverse_dict` to `traverse_obj` * [downloader/ffmpeg] Hide FFmpeg banner unless in verbose mode by [fstirlitz](https://github.com/fstirlitz) * [build] Release `yt-dlp.tar.gz` * [build,update] Add GNU-style SHA512 and prepare updater for similar SHA256 by [nihil-admirari](https://github.com/nihil-admirari) * [pyinst] Show Python version in exe metadata by [nihil-admirari](https://github.com/nihil-admirari) * [docs] Improve documentation of dependencies * [cleanup] Mark unused files * [cleanup] Point all shebang to `python3` by [fstirlitz](https://github.com/fstirlitz) * [cleanup] Remove duplicate file `trovolive.py` ### 2021.06.01 * Merge youtube-dl: Upto [commit/d495292](https://github.com/ytdl-org/youtube-dl/commit/d495292852b6c2f1bd58bc2141ff2b0265c952cf) * Pre-check archive and filters during playlist extraction * Handle Basic Auth `user:pass` in URLs by [hhirtz](https://github.com/hhirtz) and [pukkandan](https://github.com/pukkandan) * [archiveorg] Add YoutubeWebArchiveIE by [coletdjnz](https://github.com/coletdjnz) and [alex-gedeon](https://github.com/alex-gedeon) * [fancode] Add extractor by [rhsmachine](https://github.com/rhsmachine) * [patreon] Support vimeo embeds by [rhsmachine](https://github.com/rhsmachine) * [Saitosan] Add new extractor by [llacb47](https://github.com/llacb47) * [ShemarooMe] Add extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) * [telemundo] Add extractor by [king-millez](https://github.com/king-millez) * [SonyLIV] Add SonyLIVSeriesIE and subtitle support by [Ashish0804](https://github.com/Ashish0804) * [Hotstar] Add HotStarSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [Voot] Add VootSeriesIE by [Ashish0804](https://github.com/Ashish0804) * [vidio] Support login and premium videos by [MinePlayersPE](https://github.com/MinePlayersPE) * [fragment] When using `-N`, do not keep the fragment content in memory * [ffmpeg] Download and merge in a single step if possible * [ThumbnailsConvertor] Support conversion to `png` and make it the default by [louie-github](https://github.com/louie-github) * [VideoConvertor] Generalize with remuxer and allow conditional recoding * [EmbedThumbnail] Embed in `mp4`/`m4a` using mutagen by [tripulse](https://github.com/tripulse) and [pukkandan](https://github.com/pukkandan) * [EmbedThumbnail] Embed if any thumbnail was downloaded, not just the best * [EmbedThumbnail] Correctly escape filename * [update] replace self without launching a subprocess in windows * [update] Block further update for unsupported systems * Refactor `__process_playlist` by creating `LazyList` * Write messages to `stderr` when both `quiet` and `verbose` * Sanitize and sort playlist thumbnails * Remove `None` values from `info.json` * [extractor] Always prefer native hls downloader by default * [extractor] Skip subtitles without URI in m3u8 manifests by [hheimbuerger](https://github.com/hheimbuerger) * [extractor] Functions to parse `socket.io` response as `json` by [pukkandan](https://github.com/pukkandan) and [llacb47](https://github.com/llacb47) * [extractor] Allow `note=False` when extracting manifests * [utils] Escape URLs in `sanitized_Request`, not `sanitize_url` * [hls] Disable external downloader for `webtt` * [youtube] `/live` URLs should raise error if channel is not live * [youtube] Bug fixes * [zee5] Fix m3u8 formats' extension * [ard] Allow URLs without `-` before id by [olifre](https://github.com/olifre) * [cleanup] `YoutubeDL._match_entry` * [cleanup] Refactor updater * [cleanup] Refactor ffmpeg convertors * [cleanup] setup.py ### 2021.05.20 * **Youtube improvements**: * Support youtube music `MP`, `VL` and `browse` pages * Extract more formats for youtube music by [craftingmod](https://github.com/craftingmod), [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * Extract multiple subtitles in same language by [pukkandan](https://github.com/pukkandan) and [tpikonen](https://github.com/tpikonen) * Redirect channels that doesn't have a `videos` tab to their `UU` playlists * Support in-channel search * Sort audio-only formats correctly * Always extract `maxresdefault` thumbnail * Extract audio language * Add subtitle language names by [nixxo](https://github.com/nixxo) and [tpikonen](https://github.com/tpikonen) * Show alerts only from the final webpage * Add `html5=1` param to `get_video_info` page requests by [coletdjnz](https://github.com/coletdjnz) * Better message when login required * **Add option `--print`**: to print any field/template * Makes redundant: `--get-description`, `--get-duration`, `--get-filename`, `--get-format`, `--get-id`, `--get-thumbnail`, `--get-title`, `--get-url` * Field `additional_urls` to download additional videos from metadata using [`--parse-metadata`](https://github.com/yt-dlp/yt-dlp#modifying-metadata) * Merge youtube-dl: Upto [commit/dfbbe29](https://github.com/ytdl-org/youtube-dl/commit/dfbbe2902fc67f0f93ee47a8077c148055c67a9b) * Write thumbnail of playlist and add `pl_thumbnail` outtmpl key * [embedthumbnail] Add `flac` support and refactor `mutagen` code by [pukkandan](https://github.com/pukkandan) and [tripulse](https://github.com/tripulse) * [audius:artist] Add extractor by [king-millez](https://github.com/king-millez) * [parlview] Add extractor by [king-millez](https://github.com/king-millez) * [tenplay] Fix extractor by [king-millez](https://github.com/king-millez) * [rmcdecouverte] Generalize `_VALID_URL` * Add compat-option `no-attach-infojson` * Add field `name` for subtitles * Ensure `post_extract` and `pre_process` only run once * Fix `--check-formats` when there is network error * Standardize `write_debug` and `get_param` * [options] Alias `--write-comments`, `--no-write-comments` * [options] Refactor callbacks * [test:download] Only extract enough videos for `playlist_mincount` * [extractor] bugfix for when `compat_opts` is not given * [build] Fix x86 build by [shirt](https://github.com/shirt-dev) * [cleanup] code formatting, youtube tests and readme ### 2021.05.11 * **Deprecate support for Python versions < 3.6** * **Subtitle extraction from manifests** by [fstirlitz](https://github.com/fstirlitz). See [be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details * **Improve output template:** * Allow slicing lists/strings using `field.start:end:step` * A field can also be used as offset like `field1+num+field2` * A default value can be given using `field|default` * Prevent invalid fields from causing errors * **Merge youtube-dl**: Upto [commit/a726009](https://github.com/ytdl-org/youtube-dl/commit/a7260099873acc6dc7d76cafad2f6b139087afd0) * **Remove options** `-l`, `-t`, `-A` completely and disable `--auto-number`, `--title`, `--literal`, `--id` * [Plugins] Prioritize plugins over standard extractors and prevent plugins from overwriting the standard extractor classes * [downloader] Fix `quiet` and `to_stderr` * [fragment] Ensure the file is closed on error * [fragment] Make sure first segment is not skipped * [aria2c] Fix whitespace being stripped off * [embedthumbnail] Fix bug where jpeg thumbnails were converted again * [FormatSort] Fix for when some formats have quality and others don't * [utils] Add `network_exceptions` * [utils] Escape URL while sanitizing * [ukcolumn] Add Extractor * [whowatch] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * [CBS] Improve `_VALID_URL` to support movies * [crackle] Improve extraction * [curiositystream] Fix collections * [francetvinfo] Improve video id extraction * [generic] Respect the encoding in manifest * [limelight] Obey `allow_unplayable_formats` * [mediasite] Generalize URL pattern by [fstirlitz](https://github.com/fstirlitz) * [mxplayer] Add MxplayerShowIE by [Ashish0804](https://github.com/Ashish0804) * [nebula] Move to nebula.app by [Lamieur](https://github.com/Lamieur) * [niconico] Fix HLS formats by [CXwudi](https://github.com/CXwudi), [tsukumijima](https://github.com/tsukumijima), [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan) * [niconico] Fix title and thumbnail extraction by [CXwudi](https://github.com/CXwudi) * [plutotv] Extract subtitles from manifests * [plutotv] Fix format extraction for some urls * [rmcdecouverte] Improve `_VALID_URL` * [sonyliv] Fix `title` and `series` extraction by [Ashish0804](https://github.com/Ashish0804) * [tubi] Raise "no video formats" error when video url is empty * [youtube:tab] Detect playlists inside community posts * [youtube] Add `oembed` to reserved names * [zee5] Fix extraction for some URLs by [Hadi0609](https://github.com/Hadi0609) * [zee5] Fix py2 compatibility * Fix `playlist_index` and add `playlist_autonumber`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details * Add experimental option `--check-formats` to test the URLs before format selection * Option `--compat-options` to revert [some of yt-dlp's changes](https://github.com/yt-dlp/yt-dlp#differences-in-default-behavior) * Deprecates `--list-formats-as-table`, `--list-formats-old` * Fix number of digits in `%(playlist_index)s` * Fix case sensitivity of format selector * Revert "[core] be able to hand over id and title using url_result" * Do not strip out whitespaces in `-o` and `-P` * Fix `preload_download_archive` writing verbose message to `stdout` * Move option warnings to `YoutubeDL`so that they obey `--no-warnings` and can output colors * Py2 compatibility for `FileNotFoundError` ### 2021.04.22 * **Improve output template:** * Objects can be traversed like `%(field.key1.key2)s` * An offset can be added to numeric fields as `%(field+N)s` * Deprecates `--autonumber-start` * **Improve `--sub-langs`:** * Treat `--sub-langs` entries as regex * `all` can be used to refer to all the subtitles * language codes can be prefixed with `-` to exclude it * Deprecates `--all-subs` * Add option `--ignore-no-formats-error` to ignore the "no video format" and similar errors * Add option `--skip-playlist-after-errors` to skip the rest of a playlist after a given number of errors are encountered * Merge youtube-dl: Upto [commit/7e8b3f9](https://github.com/ytdl-org/youtube-dl/commit/7e8b3f9439ebefb3a3a4e5da9c0bd2b595976438) * [downloader] Fix bug in downloader selection * [BilibiliChannel] Fix pagination by [nao20010128nao](https://github.com/nao20010128nao) and [pukkandan](https://github.com/pukkandan) * [rai] Add support for http formats by [nixxo](https://github.com/nixxo) * [TubiTv] Add TubiTvShowIE by [Ashish0804](https://github.com/Ashish0804) * [twitcasting] Fix extractor * [viu:ott] Fix extractor and support series by [lkho](https://github.com/lkho) and [pukkandan](https://github.com/pukkandan) * [youtube:tab] Show unavailable videos in playlists by [coletdjnz](https://github.com/coletdjnz) * [youtube:tab] Reload with unavailable videos for all playlists * [youtube] Ignore invalid stretch ratio * [youtube] Improve channel syncid extraction to support ytcfg by [coletdjnz](https://github.com/coletdjnz) * [youtube] Standardize API calls for tabs, mixes and search by [coletdjnz](https://github.com/coletdjnz) * [youtube] Bugfix in `_extract_ytcfg` * [mildom:user:vod] Download only necessary amount of pages * [mildom] Remove proxy completely by [fstirlitz](https://github.com/fstirlitz) * [go] Fix `_VALID_URL` * [MetadataFromField] Improve regex and add tests * [Exec] Ensure backward compatibility when the command contains `%` * [extractor] Fix inconsistent use of `report_warning` * Ensure `mergeall` selects best format when multistreams are disabled * Improve the yt-dlp.sh script by [fstirlitz](https://github.com/fstirlitz) * [lazy_extractor] Do not load plugins * [ci] Disable fail-fast * [docs] Clarify which deprecated options still work * [docs] Fix typos ### 2021.04.11 * Add option `--convert-thumbnails` (only jpg currently supported) * Format selector `mergeall` to download and merge all formats * Pass any field to `--exec` using similar syntax to output template * Choose downloader for each protocol using `--downloader PROTO:NAME` * Alias `--downloader` for `--external-downloader` * Added `native` as an option for the downloader * Merge youtube-dl: Upto [commit/4fb25ff](https://github.com/ytdl-org/youtube-dl/commit/4fb25ff5a3be5206bb72e5c4046715b1529fb2c7) (except vimeo) * [DiscoveryPlusIndia] Add DiscoveryPlusIndiaShowIE by [Ashish0804](https://github.com/Ashish0804) * [NFHSNetwork] Add extractor by [llacb47](https://github.com/llacb47) * [nebula] Add extractor (watchnebula.com) by [hheimbuerger](https://github.com/hheimbuerger) * [nitter] Fix extraction of reply tweets and update instance list by [B0pol](https://github.com/B0pol) * [nitter] Fix thumbnails by [B0pol](https://github.com/B0pol) * [youtube] Fix thumbnail URL * [youtube] Parse API parameters from initial webpage by [coletdjnz](https://github.com/coletdjnz) * [youtube] Extract comments' approximate timestamp by [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix alert extraction * [bilibili] Fix uploader * [utils] Add `datetime_from_str` and `datetime_add_months` by [coletdjnz](https://github.com/coletdjnz) * Run some `postprocessors` before actual download * Improve argument parsing for `-P`, `-o`, `-S` * Fix some `m3u8` not obeying `--allow-unplayable-formats` * Fix default of `dynamic_mpd` * Deprecate `--all-formats`, `--include-ads`, `--hls-prefer-native`, `--hls-prefer-ffmpeg` * [docs] Improvements ### 2021.04.03 * Merge youtube-dl: Upto [commit/654b4f4](https://github.com/ytdl-org/youtube-dl/commit/654b4f4ff2718f38b3182c1188c5d569c14cc70a) * Ability to set a specific field in the file's metadata using `--parse-metadata` * Ability to select n'th best format like `-f bv*.2` * [DiscoveryPlus] Add discoveryplus.in * [la7] Add podcasts and podcast playlists by [nixxo](https://github.com/nixxo) * [mildom] Update extractor with current proxy by [nao20010128nao](https://github.com/nao20010128nao) * [ard:mediathek] Fix video id extraction * [generic] Detect Invidious' link element * [youtube] Show premium state in `availability` by [coletdjnz](https://github.com/coletdjnz) * [viewsource] Add extractor to handle `view-source:` * [sponskrub] Run before embedding thumbnail * [docs] Improve `--parse-metadata` documentation ### 2021.03.24.1 * Revert [commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf) ### 2021.03.24 * Merge youtube-dl: Upto 2021.03.25 ([commit/8562218](https://github.com/ytdl-org/youtube-dl/commit/8562218350a79d4709da8593bb0c538aa0824acf)) * Parse metadata from multiple fields using `--parse-metadata` * Ability to load playlist infojson using `--load-info-json` * Write current epoch to infojson when using `--no-clean-infojson` * [youtube_live_chat] fix bug when trying to set cookies * [niconico] Fix for when logged in by [CXwudi](https://github.com/CXwudi) and [xtkoba](https://github.com/xtkoba) * [linuxacadamy] Fix login ### 2021.03.21 * Merge youtube-dl: Upto [commit/7e79ba7](https://github.com/ytdl-org/youtube-dl/commit/7e79ba7dd6e6649dd2ce3a74004b2044f2182881) * Option `--no-clean-infojson` to keep private keys in the infojson * [aria2c] Support retry/abort unavailable fragments by [damianoamatruda](https://github.com/damianoamatruda) * [aria2c] Better default arguments * [movefiles] Fix bugs and make more robust * [formatSort] Fix `quality` being ignored * [splitchapters] Fix for older ffmpeg * [sponskrub] Pass proxy to sponskrub * Make sure `post_hook` gets the final filename * Recursively remove any private keys from infojson * Embed video URL metadata inside `mp4` by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan) * Merge `webm` formats into `mkv` if thumbnails are to be embedded by [damianoamatruda](https://github.com/damianoamatruda) * Use headers and cookies when downloading subtitles by [damianoamatruda](https://github.com/damianoamatruda) * Parse resolution in info dictionary by [damianoamatruda](https://github.com/damianoamatruda) * More consistent warning messages by [damianoamatruda](https://github.com/damianoamatruda) and [pukkandan](https://github.com/pukkandan) * [docs] Add deprecated options and aliases in readme * [docs] Fix some minor mistakes * [niconico] Partial fix adapted from [animelover1984/youtube-dl@b5eff52](https://github.com/animelover1984/youtube-dl/commit/b5eff52dd9ed5565672ea1694b38c9296db3fade) (login and smile formats still don't work) * [niconico] Add user extractor by [animelover1984](https://github.com/animelover1984) * [bilibili] Add anthology support by [animelover1984](https://github.com/animelover1984) * [amcnetworks] Fix extractor by [2ShedsJackson](https://github.com/2ShedsJackson) * [stitcher] Merge from youtube-dl by [nixxo](https://github.com/nixxo) * [rcs] Improved extraction by [nixxo](https://github.com/nixxo) * [linuxacadamy] Improve regex * [youtube] Show if video is `private`, `unlisted` etc in info (`availability`) by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * [youtube] bugfix for channel playlist extraction * [nbc] Improve metadata extraction by [2ShedsJackson](https://github.com/2ShedsJackson) ### 2021.03.15 * **Split video by chapters**: using option `--split-chapters` * The output file of the split files can be set with `-o`/`-P` using the prefix `chapter:` * Additional keys `section_title`, `section_number`, `section_start`, `section_end` are available in the output template * **Parallel fragment downloads** by [shirt](https://github.com/shirt-dev) * Use option `--concurrent-fragments` (`-N`) to set the number of threads (default 1) * Merge youtube-dl: Upto [commit/3be0980](https://github.com/ytdl-org/youtube-dl/commit/3be098010f667b14075e3dfad1e74e5e2becc8ea) * [zee5] Add Show Extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) * [rai] fix drm check [nixxo](https://github.com/nixxo) * [wimtv] Add extractor by [nixxo](https://github.com/nixxo) * [mtv] Add mtv.it and extract series metadata by [nixxo](https://github.com/nixxo) * [pluto.tv] Add extractor by [kevinoconnor7](https://github.com/kevinoconnor7) * [youtube] Rewrite comment extraction by [coletdjnz](https://github.com/coletdjnz) * [embedthumbnail] Set mtime correctly * Refactor some postprocessor/downloader code by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev) ### 2021.03.07 * [youtube] Fix history, mixes, community pages and trending by [pukkandan](https://github.com/pukkandan) and [coletdjnz](https://github.com/coletdjnz) * [youtube] Fix private feeds/playlists on multi-channel accounts by [coletdjnz](https://github.com/coletdjnz) * [youtube] Extract alerts from continuation by [coletdjnz](https://github.com/coletdjnz) * [cbs] Add support for ParamountPlus by [shirt](https://github.com/shirt-dev) * [mxplayer] Rewrite extractor with show support by [pukkandan](https://github.com/pukkandan) and [Ashish0804](https://github.com/Ashish0804) * [gedi] Improvements from youtube-dl by [nixxo](https://github.com/nixxo) * [vimeo] Fix videos with password by [teesid](https://github.com/teesid) * [lbry] Support `lbry://` url by [nixxo](https://github.com/nixxo) * [bilibili] Change `Accept` header by [pukkandan](https://github.com/pukkandan) and [animelover1984](https://github.com/animelover1984) * [trovo] Pass origin header * [rai] Check for DRM by [nixxo](https://github.com/nixxo) * [downloader] Fix bug for `ffmpeg`/`httpie` * [update] Fix updater removing the executable bit on some UNIX distros * [update] Fix current build hash for UNIX * [docs] Include wget/curl/aria2c install instructions for Unix by [Ashish0804](https://github.com/Ashish0804) * Fix some videos downloading with `m3u8` extension * Remove "fixup is ignored" warning when fixup wasn't passed by user ### 2021.03.03.2 * [build] Fix bug ### 2021.03.03 * [youtube] Use new browse API for continuation page extraction by [coletdjnz](https://github.com/coletdjnz) and [pukkandan](https://github.com/pukkandan) * Fix HLS playlist downloading by [shirt](https://github.com/shirt-dev) * Merge youtube-dl: Upto [2021.03.03](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.03.03) * [mtv] Fix extractor * [nick] Fix extractor by [DennyDai](https://github.com/DennyDai) * [mxplayer] Add new extractor by [codeasashu](https://github.com/codeasashu) * [youtube] Throw error when `--extractor-retries` are exhausted * Reduce default of `--extractor-retries` to 3 * Fix packaging bugs by [hseg](https://github.com/hseg) ### 2021.03.01 * Allow specifying path in `--external-downloader` * Add option `--sleep-requests` to sleep b/w requests * Add option `--extractor-retries` to retry on known extractor errors * Extract comments only when needed * `--get-comments` doesn't imply `--write-info-json` if `-J`, `-j` or `--print-json` are used * Fix `get_executable_path` by [shirt](https://github.com/shirt-dev) * [youtube] Retry on more known errors than just HTTP-5xx * [youtube] Fix inconsistent `webpage_url` * [tennistv] Fix format sorting * [bilibiliaudio] Recognize the file as audio-only * [hrfensehen] Fix wrong import * [viki] Fix viki play pass authentication by [RobinD42](https://github.com/RobinD42) * [readthedocs] Improvements by [shirt](https://github.com/shirt-dev) * [hls] Fix bug with m3u8 format extraction * [hls] Enable `--hls-use-mpegts` by default when downloading live-streams * [embedthumbnail] Fix bug with deleting original thumbnail * [build] Fix completion paths, zsh pip completion install by [hseg](https://github.com/hseg) * [ci] Disable download tests unless specifically invoked * Cleanup some code and fix typos ### 2021.02.24 * Moved project to an organization [yt-dlp](https://github.com/yt-dlp) * **Completely changed project name to yt-dlp** by [Pccode66](https://github.com/Pccode66) and [pukkandan](https://github.com/pukkandan) * Also, `youtube-dlc` config files are no longer loaded * Merge youtube-dl: Upto [commit/4460329](https://github.com/ytdl-org/youtube-dl/commit/44603290e5002153f3ebad6230cc73aef42cc2cd) (except tmz, gedi) * [Readthedocs](https://yt-dlp.readthedocs.io) support by [shirt](https://github.com/shirt-dev) * [youtube] Show if video was a live stream in info (`was_live`) * [Zee5] Add new extractor by [Ashish0804](https://github.com/Ashish0804) and [pukkandan](https://github.com/pukkandan) * [jwplatform] Add support for `hyland.com` * [tennistv] Fix extractor * [hls] Support media initialization by [shirt](https://github.com/shirt-dev) * [hls] Added options `--hls-split-discontinuity` to better support media discontinuity by [shirt](https://github.com/shirt-dev) * [ffmpeg] Allow passing custom arguments before -i using `--ppa "ffmpeg_i1:ARGS"` syntax * Fix `--windows-filenames` removing `/` from UNIX paths * [hls] Show warning if pycryptodome is not found * [docs] Improvements * Fix documentation of `Extractor Options` * Document `all` in format selection * Document `playable_in_embed` in output templates ### 2021.02.19 * Merge youtube-dl: Upto [commit/cf2dbec](https://github.com/ytdl-org/youtube-dl/commit/cf2dbec6301177a1fddf72862de05fa912d9869d) (except kakao) * [viki] Fix extractor * [niconico] Extract `channel` and `channel_id` by [kurumigi](https://github.com/kurumigi) * [youtube] Multiple page support for hashtag URLs * [youtube] Add more invidious instances * [youtube] Fix comment extraction when comment text is empty * Option `--windows-filenames` to force use of windows compatible filenames * [ExtractAudio] Bugfix * Don't raise `parser.error` when exiting for update * [MoveFiles] Fix for when merger can't run * Changed `--trim-file-name` to `--trim-filenames` to be similar to related options * Format Sort improvements: * Prefer `vp9.2` more than other `vp9` codecs * Remove forced priority of `quality` * Remove unnecessary `field_preference` and misuse of `preference` from extractors * Build improvements: * Fix hash output by [shirt](https://github.com/shirt-dev) * Lock Python package versions for x86 and use `wheels` by [shirt](https://github.com/shirt-dev) * Exclude `vcruntime140.dll` from UPX by [jbruchon](https://github.com/jbruchon) * Set version number based on UTC time, not local time * Publish on PyPi only if token is set * [docs] Better document `--prefer-free-formats` and add `--no-prefer-free-format` ### 2021.02.15 * Merge youtube-dl: Upto [2021.02.10](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.10) (except archive.org) * [niconico] Improved extraction and support encrypted/SMILE movies by [kurumigi](https://github.com/kurumigi), [tsukumijima](https://github.com/tsukumijima), [bbepis](https://github.com/bbepis), [pukkandan](https://github.com/pukkandan) * Fix HLS AES-128 with multiple keys in external downloaders by [shirt](https://github.com/shirt-dev) * [youtube_live_chat] Fix by using POST API by [siikamiika](https://github.com/siikamiika) * [rumble] Add support for video page * Option `--allow-unplayable-formats` to allow downloading unplayable video formats * [ExtractAudio] Don't re-encode when file is already in a common audio format * [youtube] Fix search continuations * [youtube] Fix for new accounts * Improve build/updater: by [pukkandan](https://github.com/pukkandan) and [shirt](https://github.com/shirt-dev) * Fix SHA256 calculation in build and implement hash checking for updater * Exit immediately in windows once the update process starts * Fix updater for `x86.exe` * Updater looks for both `yt-dlp` and `youtube-dlc` in releases for future-proofing * Change optional dependency to `pycryptodome` * Fix issue with unicode filenames in aria2c by [shirt](https://github.com/shirt-dev) * Fix `allow_playlist_files` not being correctly passed through * Fix for empty HTTP head requests by [shirt](https://github.com/shirt-dev) * Fix `get_executable_path` in UNIX * [sponskrub] Print ffmpeg output and errors to terminal * `__real_download` should be false when ffmpeg unavailable and no download * Show `exe`/`zip`/`source` and 32/64bit in verbose message ### 2021.02.09 * **aria2c support for DASH/HLS**: by [shirt](https://github.com/shirt-dev) * **Implement Updater** (`-U`) by [shirt](https://github.com/shirt-dev) * [youtube] Fix comment extraction * [youtube_live_chat] Improve extraction * [youtube] Fix for channel URLs sometimes not downloading all pages * [aria2c] Changed default arguments to `--console-log-level=warn --summary-interval=0 --file-allocation=none -x16 -j16 -s16` * Add fallback for thumbnails * [embedthumbnail] Keep original thumbnail after conversion if write_thumbnail given * [embedsubtitle] Keep original subtitle after conversion if write_subtitles given * [pyinst.py] Move back to root dir * [youtube] Simplified renderer parsing and bugfixes * [movefiles] Fix compatibility with python2 * [remuxvideo] Fix validation of conditional remux * [sponskrub] Don't raise error when the video does not exist * [docs] Crypto is an optional dependency ### 2021.02.04 * Merge youtube-dl: Upto [2021.02.04.1](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.02.04.1) * **Date/time formatting in output template:** * You can use [`strftime`](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) to format date/time fields. Example: `%(upload_date>%Y-%m-%d)s` * **Multiple output templates:** * Separate output templates can be given for the different metadata files by using `-o TYPE:TEMPLATE` * The allowed types are: `subtitle|thumbnail|description|annotation|infojson|pl_description|pl_infojson` * [youtube] More metadata extraction for channel/playlist URLs (channel, uploader, thumbnail, tags) * New option `--no-write-playlist-metafiles` to prevent writing playlist metadata files * [audius] Fix extractor * [youtube_live_chat] Fix `parse_yt_initial_data` and add `fragment_retries` * [postprocessor] Raise errors correctly * [metadatafromtitle] Fix bug when extracting data from numeric fields * Fix issue with overwriting files * Fix "Default format spec" appearing in quiet mode * [FormatSort] Allow user to prefer av01 over vp9 (The default is still vp9) * [FormatSort] fix bug where `quality` had more priority than `hasvid` * [pyinst] Automatically detect Python architecture and working directory * Strip out internal fields such as `_filename` from infojson ### 2021.01.29 * **Features from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl)**: by [animelover1984](https://github.com/animelover1984) and [bbepis](https://github.com/bbepis) * Add `--get-comments` * [youtube] Extract comments * [billibilli] Added BiliBiliSearchIE, BilibiliChannelIE * [billibilli] Extract comments * [billibilli] Better video extraction * Write playlist data to infojson * [FFmpegMetadata] Embed infojson inside the video * [EmbedThumbnail] Try embedding in mp4 using ffprobe and `-disposition` * [EmbedThumbnail] Treat mka like mkv and mov like mp4 * [EmbedThumbnail] Embed in ogg/opus * [VideoRemuxer] Conditionally remux video * [VideoRemuxer] Add `-movflags +faststart` when remuxing to mp4 * [ffmpeg] Print entire stderr in verbose when there is error * [EmbedSubtitle] Warn when embedding ass in mp4 * [anvato] Use NFLTokenGenerator if possible * **Parse additional metadata**: New option `--parse-metadata` to extract additional metadata from existing fields * The extracted fields can be used in `--output` * Deprecated `--metadata-from-title` * [Audius] Add extractor * [youtube] Extract playlist description and write it to `.description` file * Detect existing files even when using `recode`/`remux` (`extract-audio` is partially fixed) * Fix wrong user config from v2021.01.24 * [youtube] Report error message from youtube as error instead of warning * [FormatSort] Fix some fields not sorting from v2021.01.24 * [postprocessor] Deprecate `avconv`/`avprobe`. All current functionality is left untouched. But don't expect any new features to work with avconv * [postprocessor] fix `write_debug` to not throw error when there is no `_downloader` * [movefiles] Don't give "cant find" warning when move is unnecessary * Refactor `update-version`, `pyinst.py` and related files * [ffmpeg] Document more formats that are supported for remux/recode ### 2021.01.24 * Merge youtube-dl: Upto [2021.01.24](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16) * Plugin support ([documentation](https://github.com/yt-dlp/yt-dlp#plugins)) * **Multiple paths**: New option `-P`/`--paths` to give different paths for different types of files * The syntax is `-P "type:path" -P "type:path"` * Valid types are: home, temp, description, annotation, subtitle, infojson, thumbnail * Additionally, configuration file is taken from home directory or current directory * Allow passing different arguments to different external downloaders * [mildom] Add extractor by [nao20010128nao](https://github.com/nao20010128nao) * Warn when using old style `--external-downloader-args` and `--post-processor-args` * Fix `--no-overwrite` when using `--write-link` * [sponskrub] Output `unrecognized argument` error message correctly * [cbs] Make failure to extract title non-fatal * Fix typecasting when pre-checking archive * Fix issue with setting title on UNIX * Deprecate redundant aliases in `formatSort`. The aliases remain functional for backward compatibility, but will be left undocumented * [tests] Fix test_post_hooks * [tests] Split core and download tests ### 2021.01.20 * [TrovoLive] Add extractor (only VODs) * [pokemon] Add `/#/player` URLs * Improved parsing of multiple postprocessor-args, add `--ppa` as alias * [EmbedThumbnail] Simplify embedding in mkv * [sponskrub] Encode filenames correctly, better debug output and error message * [readme] Cleanup options ### 2021.01.16 * Merge youtube-dl: Upto [2021.01.16](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.16) * **Configuration files:** * Portable configuration file: `./yt-dlp.conf` * Allow the configuration files to be named `yt-dlp` instead of `youtube-dlc`. See [this](https://github.com/yt-dlp/yt-dlp#configuration) for details * Add PyPI release ### 2021.01.14 * Added option `--break-on-reject` * [roosterteeth.com] Fix for bonus episodes by [Zocker1999NET](https://github.com/Zocker1999NET) * [tiktok] Fix for when share_info is empty * [EmbedThumbnail] Fix bug due to incorrect function name * [docs] Changed sponskrub links to point to [yt-dlp/SponSkrub](https://github.com/yt-dlp/SponSkrub) since I am now providing both linux and windows releases * [docs] Change all links to correctly point to new fork URL * [docs] Fixes typos ### 2021.01.12 * [roosterteeth.com] Add subtitle support by [samiksome](https://github.com/samiksome) * Added `--force-overwrites`, `--no-force-overwrites` by [alxnull](https://github.com/alxnull) * Changed fork name to `yt-dlp` * Fix typos by [FelixFrog](https://github.com/FelixFrog) * [ci] Option to skip * [changelog] Added unreleased changes in blackjack4494/yt-dlc ### 2021.01.10 * [archive.org] Fix extractor and add support for audio and playlists by [wporr](https://github.com/wporr) * [Animelab] Added by [mariuszskon](https://github.com/mariuszskon) * [youtube:search] Fix view_count by [ohnonot](https://github.com/ohnonot) * [youtube] Show if video is embeddable in info (`playable_in_embed`) * Update version badge automatically in README * Enable `test_youtube_search_matching` * Create `to_screen` and similar functions in postprocessor/common ### 2021.01.09 * [youtube] Fix bug in automatic caption extraction * Add `post_hooks` to YoutubeDL by [alexmerkel](https://github.com/alexmerkel) * Batch file enumeration improvements by [glenn-slayden](https://github.com/glenn-slayden) * Stop immediately when reaching `--max-downloads` by [glenn-slayden](https://github.com/glenn-slayden) * Fix incorrect ANSI sequence for restoring console-window title by [glenn-slayden](https://github.com/glenn-slayden) * Kill child processes when yt-dlc is killed by [Unrud](https://github.com/Unrud) ### 2021.01.08 * Merge youtube-dl: Upto [2021.01.08](https://github.com/ytdl-org/youtube-dl/releases/tag/2021.01.08) except stitcher ([1](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc), [2](https://github.com/ytdl-org/youtube-dl/commit/a563c97c5cddf55f8989ed7ea8314ef78e30107f)) * Moved changelog to separate file ### 2021.01.07-1 * [Akamai] fix by [nixxo](https://github.com/nixxo) * [Tiktok] merge youtube-dl tiktok extractor by [GreyAlien502](https://github.com/GreyAlien502) * [vlive] add support for playlists by [kyuyeunk](https://github.com/kyuyeunk) * [youtube_live_chat] make sure playerOffsetMs is positive by [siikamiika](https://github.com/siikamiika) * Ignore extra data streams in ffmpeg by [jbruchon](https://github.com/jbruchon) * Allow passing different arguments to different postprocessors using `--postprocessor-args` * Deprecated `--sponskrub-args`. The same can now be done using `--postprocessor-args "sponskrub:<args>"` * [CI] Split tests into core-test and full-test ### 2021.01.07 * Removed priority of `av01` codec in `-S` since most devices don't support it yet * Added `duration_string` to be used in `--output` * Created First Release ### 2021.01.05-1 * **Changed defaults:** * Enabled `--ignore` * Disabled `--video-multistreams` and `--audio-multistreams` * Changed default format selection to `bv*+ba/b` when `--audio-multistreams` is disabled * Changed default format sort order to `res,fps,codec,size,br,asr,proto,ext,has_audio,source,format_id` * Changed `webm` to be more preferable than `flv` in format sorting * Changed default output template to `%(title)s [%(id)s].%(ext)s` * Enabled `--list-formats-as-table` ### 2021.01.05 * **Format Sort:** Added `--format-sort` (`-S`), `--format-sort-force` (`--S-force`) - See [Sorting Formats](README.md#sorting-formats) for details * **Format Selection:** See [Format Selection](README.md#format-selection) for details * New format selectors: `best*`, `worst*`, `bestvideo*`, `bestaudio*`, `worstvideo*`, `worstaudio*` * Changed video format sorting to show video only files and video+audio files together * Added `--video-multistreams`, `--no-video-multistreams`, `--audio-multistreams`, `--no-audio-multistreams` * Added `b`,`w`,`v`,`a` as alias for `best`, `worst`, `video` and `audio` respectively * Shortcut Options: Added `--write-link`, `--write-url-link`, `--write-webloc-link`, `--write-desktop-link` by [h-h-h-h](https://github.com/h-h-h-h) - See [Internet Shortcut Options](README.md#internet-shortcut-options) for details * **Sponskrub integration:** Added `--sponskrub`, `--sponskrub-cut`, `--sponskrub-force`, `--sponskrub-location`, `--sponskrub-args` - See [SponSkrub Options](README.md#sponskrub-sponsorblock-options) for details * Added `--force-download-archive` (`--force-write-archive`) by [h-h-h-h](https://github.com/h-h-h-h) * Added `--list-formats-as-table`, `--list-formats-old` * **Negative Options:** Makes it possible to negate most boolean options by adding a `no-` to the switch. Usefull when you want to reverse an option that is defined in a config file * Added `--no-ignore-dynamic-mpd`, `--no-allow-dynamic-mpd`, `--allow-dynamic-mpd`, `--youtube-include-hls-manifest`, `--no-youtube-include-hls-manifest`, `--no-youtube-skip-hls-manifest`, `--no-download`, `--no-download-archive`, `--resize-buffer`, `--part`, `--mtime`, `--no-keep-fragments`, `--no-cookies`, `--no-write-annotations`, `--no-write-info-json`, `--no-write-description`, `--no-write-thumbnail`, `--youtube-include-dash-manifest`, `--post-overwrites`, `--no-keep-video`, `--no-embed-subs`, `--no-embed-thumbnail`, `--no-add-metadata`, `--no-include-ads`, `--no-write-sub`, `--no-write-auto-sub`, `--no-playlist-reverse`, `--no-restrict-filenames`, `--youtube-include-dash-manifest`, `--no-format-sort-force`, `--flat-videos`, `--no-list-formats-as-table`, `--no-sponskrub`, `--no-sponskrub-cut`, `--no-sponskrub-force` * Renamed: `--write-subs`, `--no-write-subs`, `--no-write-auto-subs`, `--write-auto-subs`. Note that these can still be used without the ending "s" * Relaxed validation for format filters so that any arbitrary field can be used * Fix for embedding thumbnail in mp3 by [pauldubois98](https://github.com/pauldubois98) ([ytdl-org/youtube-dl#21569](https://github.com/ytdl-org/youtube-dl/pull/21569)) * Make Twitch Video ID output from Playlist and VOD extractor same. This is only a temporary fix * Merge youtube-dl: Upto [2021.01.03](https://github.com/ytdl-org/youtube-dl/commit/8e953dcbb10a1a42f4e12e4e132657cb0100a1f8) - See [blackjack4494/yt-dlc#280](https://github.com/blackjack4494/yt-dlc/pull/280) for details * Extractors [tiktok](https://github.com/ytdl-org/youtube-dl/commit/fb626c05867deab04425bad0c0b16b55473841a2) and [hotstar](https://github.com/ytdl-org/youtube-dl/commit/bb38a1215718cdf36d73ff0a7830a64cd9fa37cc) have not been merged * Cleaned up the fork for public use **Note**: All uncredited changes above this point are authored by [pukkandan](https://github.com/pukkandan) ### Unreleased changes in [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) * Updated to youtube-dl release 2020.11.26 by [pukkandan](https://github.com/pukkandan) * Youtube improvements by [pukkandan](https://github.com/pukkandan) * Implemented all Youtube Feeds (ytfav, ytwatchlater, ytsubs, ythistory, ytrec) and SearchURL * Fix some improper Youtube URLs * Redirect channel home to /video * Print youtube's warning message * Handle Multiple pages for feeds better * [youtube] Fix ytsearch not returning results sometimes due to promoted content by [coletdjnz](https://github.com/coletdjnz) * [youtube] Temporary fix for automatic captions - disable json3 by [blackjack4494](https://github.com/blackjack4494) * Add --break-on-existing by [gergesh](https://github.com/gergesh) * Pre-check video IDs in the archive before downloading by [pukkandan](https://github.com/pukkandan) * [bitwave.tv] New extractor by [lorpus](https://github.com/lorpus) * [Gedi] Add extractor by [nixxo](https://github.com/nixxo) * [Rcs] Add new extractor by [nixxo](https://github.com/nixxo) * [skyit] New skyitalia extractor by [nixxo](https://github.com/nixxo) * [france.tv] Fix thumbnail URL by [renalid](https://github.com/renalid) * [ina] support mobile links by [B0pol](https://github.com/B0pol) * [instagram] Fix thumbnail extractor by [nao20010128nao](https://github.com/nao20010128nao) * [SouthparkDe] Support for English URLs by [xypwn](https://github.com/xypwn) * [spreaker] fix SpreakerShowIE test URL by [pukkandan](https://github.com/pukkandan) * [Vlive] Fix playlist handling when downloading a channel by [kyuyeunk](https://github.com/kyuyeunk) * [tmz] Fix extractor by [diegorodriguezv](https://github.com/diegorodriguezv) * [ITV] BTCC URL update by [WolfganP](https://github.com/WolfganP) * [generic] Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan) * [generic] Extract embedded youtube and twitter videos by [diegorodriguezv](https://github.com/diegorodriguezv) * [ffmpeg] Ensure all streams are copied by [pukkandan](https://github.com/pukkandan) * [embedthumbnail] Fix for os.rename error by [pukkandan](https://github.com/pukkandan) * make_win.bat: don't use UPX to pack vcruntime140.dll by [jbruchon](https://github.com/jbruchon) ### Changelog of [blackjack4494/yt-dlc](https://github.com/blackjack4494/yt-dlc) till release 2020.11.11-3 **Note**: This was constructed from the merge commit messages and may not be entirely accurate * [bandcamp] fix failing test. remove subclass hack by [insaneracist](https://github.com/insaneracist) * [bandcamp] restore album downloads by [insaneracist](https://github.com/insaneracist) * [francetv] fix extractor by [Surkal](https://github.com/Surkal) * [gdcvault] fix extractor by [blackjack4494](https://github.com/blackjack4494) * [hotstar] Move to API v1 by [theincognito-inc](https://github.com/theincognito-inc) * [hrfernsehen] add extractor by [blocktrron](https://github.com/blocktrron) * [kakao] new apis by [blackjack4494](https://github.com/blackjack4494) * [la7] fix missing protocol by [nixxo](https://github.com/nixxo) * [mailru] removed escaped braces, use urljoin, added tests by [nixxo](https://github.com/nixxo) * [MTV/Nick] universal mgid extractor + fix nick.de feed by [blackjack4494](https://github.com/blackjack4494) * [mtv] Fix a missing match_id by [nixxo](https://github.com/nixxo) * [Mtv] updated extractor logic & more by [blackjack4494](https://github.com/blackjack4494) * [ndr] support Daserste ndr by [blackjack4494](https://github.com/blackjack4494) * [Netzkino] Only use video id to find metadata by [TobiX](https://github.com/TobiX) * [newgrounds] fix: video download by [insaneracist](https://github.com/insaneracist) * [nitter] Add new extractor by [B0pol](https://github.com/B0pol) * [soundcloud] Resolve audio/x-wav by [tfvlrue](https://github.com/tfvlrue) * [soundcloud] sets pattern and tests by [blackjack4494](https://github.com/blackjack4494) * [SouthparkDE/MTV] another mgid extraction (mtv_base) feed url updated by [blackjack4494](https://github.com/blackjack4494) * [StoryFire] Add new extractor by [sgstair](https://github.com/sgstair) * [twitch] by [geauxlo](https://github.com/geauxlo) * [videa] Adapt to updates by [adrianheine](https://github.com/adrianheine) * [Viki] subtitles, formats by [blackjack4494](https://github.com/blackjack4494) * [vlive] fix extractor for revamped website by [exwm](https://github.com/exwm) * [xtube] fix extractor by [insaneracist](https://github.com/insaneracist) * [youtube] Convert subs when download is skipped by [blackjack4494](https://github.com/blackjack4494) * [youtube] Fix age gate detection by [random-nick](https://github.com/random-nick) * [youtube] fix yt-only playback when age restricted/gated - requires cookies by [blackjack4494](https://github.com/blackjack4494) * [youtube] fix: extract artist metadata from ytInitialData by [insaneracist](https://github.com/insaneracist) * [youtube] fix: extract mix playlist ids from ytInitialData by [insaneracist](https://github.com/insaneracist) * [youtube] fix: mix playlist title by [insaneracist](https://github.com/insaneracist) * [youtube] fix: Youtube Music playlists by [insaneracist](https://github.com/insaneracist) * [Youtube] Fixed problem with new youtube player by [peet1993](https://github.com/peet1993) * [zoom] Fix url parsing for url's containing /share/ and dots by [Romern](https://github.com/Romern) * [zoom] new extractor by [insaneracist](https://github.com/insaneracist) * abc by [adrianheine](https://github.com/adrianheine) * Added Comcast_SSO fix by [merval](https://github.com/merval) * Added DRM logic to brightcove by [merval](https://github.com/merval) * Added regex for ABC.com site. by [kucksdorfs](https://github.com/kucksdorfs) * alura by [hugohaa](https://github.com/hugohaa) * Arbitrary merges by [fstirlitz](https://github.com/fstirlitz) * ard.py_add_playlist_support by [martin54](https://github.com/martin54) * Bugfix/youtube/chapters fix extractor by [gschizas](https://github.com/gschizas) * bugfix_youtube_like_extraction by [RedpointsBots](https://github.com/RedpointsBots) * Create build workflow by [blackjack4494](https://github.com/blackjack4494) * deezer by [LucBerge](https://github.com/LucBerge) * Detect embedded bitchute videos by [pukkandan](https://github.com/pukkandan) * Don't install tests by [l29ah](https://github.com/l29ah) * Don't try to embed/convert json subtitles generated by [youtube](https://github.com/youtube) livechat by [pukkandan](https://github.com/pukkandan) * Doodstream by [sxvghd](https://github.com/sxvghd) * duboku by [lkho](https://github.com/lkho) * elonet by [tpikonen](https://github.com/tpikonen) * ext/remuxe-video by [Zocker1999NET](https://github.com/Zocker1999NET) * fall-back to the old way to fetch subtitles, if needed by [RobinD42](https://github.com/RobinD42) * feature_subscriber_count by [RedpointsBots](https://github.com/RedpointsBots) * Fix external downloader when there is no http_header by [pukkandan](https://github.com/pukkandan) * Fix issue triggered by [tubeup](https://github.com/tubeup) by [nsapa](https://github.com/nsapa) * Fix YoutubePlaylistsIE by [ZenulAbidin](https://github.com/ZenulAbidin) * fix-mitele' by [DjMoren](https://github.com/DjMoren) * fix/google-drive-cookie-issue by [legraphista](https://github.com/legraphista) * fix_tiktok by [mervel-mervel](https://github.com/mervel-mervel) * Fixed problem with JS player URL by [peet1993](https://github.com/peet1993) * fixYTSearch by [xarantolus](https://github.com/xarantolus) * FliegendeWurst-3sat-zdf-merger-bugfix-feature * gilou-bandcamp_update * implement ThisVid extractor by [rigstot](https://github.com/rigstot) * JensTimmerman-patch-1 by [JensTimmerman](https://github.com/JensTimmerman) * Keep download archive in memory for better performance by [jbruchon](https://github.com/jbruchon) * la7-fix by [iamleot](https://github.com/iamleot) * magenta by [adrianheine](https://github.com/adrianheine) * Merge 26564 from [adrianheine](https://github.com/adrianheine) * Merge code from [ddland](https://github.com/ddland) * Merge code from [nixxo](https://github.com/nixxo) * Merge code from [ssaqua](https://github.com/ssaqua) * Merge code from [zubearc](https://github.com/zubearc) * mkvthumbnail by [MrDoritos](https://github.com/MrDoritos) * myvideo_ge by [fonkap](https://github.com/fonkap) * naver by [SeonjaeHyeon](https://github.com/SeonjaeHyeon) * ondemandkorea by [julien-hadleyjack](https://github.com/julien-hadleyjack) * rai-update by [iamleot](https://github.com/iamleot) * RFC: youtube: Polymer UI and JSON endpoints for playlists by [wlritchi](https://github.com/wlritchi) * rutv by [adrianheine](https://github.com/adrianheine) * Sc extractor web auth by [blackjack4494](https://github.com/blackjack4494) * Switch from binary search tree to Python sets by [jbruchon](https://github.com/jbruchon) * tiktok by [skyme5](https://github.com/skyme5) * tvnow by [TinyToweringTree](https://github.com/TinyToweringTree) * twitch-fix by [lel-amri](https://github.com/lel-amri) * Twitter shortener by [blackjack4494](https://github.com/blackjack4494) * Update README.md by [JensTimmerman](https://github.com/JensTimmerman) * Update to reflect website changes. by [amigatomte](https://github.com/amigatomte) * use webarchive to fix a dead link in README by [B0pol](https://github.com/B0pol) * Viki the second by [blackjack4494](https://github.com/blackjack4494) * wdr-subtitles by [mrtnmtth](https://github.com/mrtnmtth) * Webpfix by [alexmerkel](https://github.com/alexmerkel) * Youtube live chat by [siikamiika](https://github.com/siikamiika) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/Collaborators.md������������������������������������������������������������������0000664�0000000�0000000�00000007010�14675634471�0016501�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Collaborators This is a list of the collaborators of the project and their major contributions. See the [Changelog](Changelog.md) for more details. You can also find lists of all [contributors of yt-dlp](CONTRIBUTORS) and [authors of youtube-dl](https://github.com/ytdl-org/youtube-dl/blob/master/AUTHORS) ## [pukkandan](https://github.com/pukkandan) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/pukkandan) [![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/pukkandan) * Owner of the fork ## [shirt](https://github.com/shirt-dev) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/shirt) * Multithreading (`-N`) and aria2c support for fragment downloads * Support for media initialization and discontinuity in HLS * The self-updater (`-U`) ## [coletdjnz](https://github.com/coletdjnz) [![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/coletdjnz) * Improved plugin architecture * Rewrote the networking infrastructure, implemented support for `requests` * YouTube improvements including: age-gate bypass, private playlists, multiple-clients (to avoid throttling) and a lot of under-the-hood improvements * Added support for new websites YoutubeWebArchive, MainStreaming, PRX, nzherald, Mediaklikk, StarTV etc * Improved/fixed support for Patreon, panopto, gfycat, itv, pbs, SouthParkDE etc ## [Ashish0804](https://github.com/Ashish0804) <sub><sup>[Inactive]</sup></sub> [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/ashish0804) * Added support for new websites BiliIntl, DiscoveryPlusIndia, OlympicsReplay, PlanetMarathi, ShemarooMe, Utreon, Zee5 etc * Added playlist/series downloads for Hotstar, ParamountPlus, Rumble, SonyLIV, Trovo, TubiTv, Voot etc * Improved/fixed support for HiDive, HotStar, Hungama, LBRY, LinkedInLearning, Mxplayer, SonyLiv, TV2, Vimeo, VLive etc ## [bashonly](https://github.com/bashonly) * `--update-to`, self-updater rewrite, automated/nightly/master releases * `--cookies-from-browser` support for Firefox containers, external downloader cookie handling overhaul * Added support for new websites like Dacast, Kick, NBCStations, Triller, VideoKen, Weverse, WrestleUniverse etc * Improved/fixed support for Anvato, Brightcove, Reddit, SlidesLive, TikTok, Twitter, Vimeo etc ## [Grub4K](https://github.com/Grub4K) [![gh-sponsor](https://img.shields.io/badge/_-Github-white.svg?logo=github&labelColor=555555&style=for-the-badge)](https://github.com/sponsors/Grub4K) [![ko-fi](https://img.shields.io/badge/_-Ko--fi-red.svg?logo=kofi&labelColor=555555&style=for-the-badge)](https://ko-fi.com/Grub4K) * `--update-to`, self-updater rewrite, automated/nightly/master releases * Reworked internals like `traverse_obj`, various core refactors and bugs fixes * Implemented proper progress reporting for parallel downloads * Improved/fixed/added Bundestag, crunchyroll, pr0gramm, Twitter, WrestleUniverse etc ## [sepro](https://github.com/seproDev) * UX improvements: Warn when ffmpeg is missing, warn when double-clicking exe * Code cleanup: Remove dead extractors, mark extractors as broken, enable/apply ruff rules * Improved/fixed/added ArdMediathek, DRTV, Floatplane, MagentaMusik, Naver, Nebula, OnDemandKorea, Vbox7 etc ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/LICENSE���������������������������������������������������������������������������0000664�0000000�0000000�00000002273�14675634471�0014364�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������This is free and unencumbered software released into the public domain. Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means. In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. For more information, please refer to <http://unlicense.org/> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/Makefile��������������������������������������������������������������������������0000664�0000000�0000000�00000017221�14675634471�0015016�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������all: lazy-extractors yt-dlp doc pypi-files clean: clean-test clean-dist clean-all: clean clean-cache completions: completion-bash completion-fish completion-zsh doc: README.md CONTRIBUTING.md CONTRIBUTORS issuetemplates supportedsites ot: offlinetest tar: yt-dlp.tar.gz # Keep this list in sync with pyproject.toml includes/artifacts # intended use: when building a source distribution, # make pypi-files && python3 -m build -sn . pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \ completions yt-dlp.1 pyproject.toml setup.cfg devscripts/* test/* .PHONY: all clean clean-all clean-test clean-dist clean-cache \ completions completion-bash completion-fish completion-zsh \ doc issuetemplates supportedsites ot offlinetest codetest test \ tar pypi-files lazy-extractors install uninstall clean-test: rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \ *.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \ *.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \ *.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp clean-dist: rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \ yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS clean-cache: find . \( \ -type d -name ".*_cache" -o -type d -name __pycache__ -o -name "*.pyc" -o -name "*.class" \ \) -prune -exec rm -rf {} \; completion-bash: completions/bash/yt-dlp completion-fish: completions/fish/yt-dlp.fish completion-zsh: completions/zsh/_yt-dlp lazy-extractors: yt_dlp/extractor/lazy_extractors.py PREFIX ?= /usr/local BINDIR ?= $(PREFIX)/bin MANDIR ?= $(PREFIX)/man SHAREDIR ?= $(PREFIX)/share PYTHON ?= /usr/bin/env python3 GNUTAR ?= tar # set markdown input format to "markdown-smart" for pandoc version 2+ and to "markdown" for pandoc prior to version 2 PANDOC_VERSION_CMD = pandoc -v 2>/dev/null | head -n1 | cut -d' ' -f2 | head -c1 PANDOC_VERSION != $(PANDOC_VERSION_CMD) PANDOC_VERSION ?= $(shell $(PANDOC_VERSION_CMD)) MARKDOWN_CMD = if [ "$(PANDOC_VERSION)" = "1" -o "$(PANDOC_VERSION)" = "0" ]; then echo markdown; else echo markdown-smart; fi MARKDOWN != $(MARKDOWN_CMD) MARKDOWN ?= $(shell $(MARKDOWN_CMD)) install: lazy-extractors yt-dlp yt-dlp.1 completions mkdir -p $(DESTDIR)$(BINDIR) install -m755 yt-dlp $(DESTDIR)$(BINDIR)/yt-dlp mkdir -p $(DESTDIR)$(MANDIR)/man1 install -m644 yt-dlp.1 $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 mkdir -p $(DESTDIR)$(SHAREDIR)/bash-completion/completions install -m644 completions/bash/yt-dlp $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp mkdir -p $(DESTDIR)$(SHAREDIR)/zsh/site-functions install -m644 completions/zsh/_yt-dlp $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp mkdir -p $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d install -m644 completions/fish/yt-dlp.fish $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish uninstall: rm -f $(DESTDIR)$(BINDIR)/yt-dlp rm -f $(DESTDIR)$(MANDIR)/man1/yt-dlp.1 rm -f $(DESTDIR)$(SHAREDIR)/bash-completion/completions/yt-dlp rm -f $(DESTDIR)$(SHAREDIR)/zsh/site-functions/_yt-dlp rm -f $(DESTDIR)$(SHAREDIR)/fish/vendor_completions.d/yt-dlp.fish codetest: ruff check . autopep8 --diff . test: $(PYTHON) -m pytest -Werror $(MAKE) codetest offlinetest: codetest $(PYTHON) -m pytest -Werror -m "not download" CODE_FOLDERS_CMD = find yt_dlp -type f -name '__init__.py' | sed 's,/__init__.py,,' | grep -v '/__' | sort CODE_FOLDERS != $(CODE_FOLDERS_CMD) CODE_FOLDERS ?= $(shell $(CODE_FOLDERS_CMD)) CODE_FILES_CMD = for f in $(CODE_FOLDERS) ; do echo "$$f" | sed 's,$$,/*.py,' ; done CODE_FILES != $(CODE_FILES_CMD) CODE_FILES ?= $(shell $(CODE_FILES_CMD)) yt-dlp: $(CODE_FILES) mkdir -p zip for d in $(CODE_FOLDERS) ; do \ mkdir -p zip/$$d ;\ cp -pPR $$d/*.py zip/$$d/ ;\ done (cd zip && touch -t 200001010101 $(CODE_FILES)) mv zip/yt_dlp/__main__.py zip/ (cd zip && zip -q ../yt-dlp $(CODE_FILES) __main__.py) rm -rf zip echo '#!$(PYTHON)' > yt-dlp cat yt-dlp.zip >> yt-dlp rm yt-dlp.zip chmod a+x yt-dlp README.md: $(CODE_FILES) devscripts/make_readme.py COLUMNS=80 $(PYTHON) yt_dlp/__main__.py --ignore-config --help | $(PYTHON) devscripts/make_readme.py CONTRIBUTING.md: README.md devscripts/make_contributing.py $(PYTHON) devscripts/make_contributing.py README.md CONTRIBUTING.md issuetemplates: devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml yt_dlp/version.py $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/1_broken_site.yml .github/ISSUE_TEMPLATE/1_broken_site.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/2_site_support_request.yml .github/ISSUE_TEMPLATE/2_site_support_request.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/3_site_feature_request.yml .github/ISSUE_TEMPLATE/3_site_feature_request.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/4_bug_report.yml .github/ISSUE_TEMPLATE/4_bug_report.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/5_feature_request.yml .github/ISSUE_TEMPLATE/5_feature_request.yml $(PYTHON) devscripts/make_issue_template.py .github/ISSUE_TEMPLATE_tmpl/6_question.yml .github/ISSUE_TEMPLATE/6_question.yml supportedsites: $(PYTHON) devscripts/make_supportedsites.py supportedsites.md README.txt: README.md pandoc -f $(MARKDOWN) -t plain README.md -o README.txt yt-dlp.1: README.md devscripts/prepare_manpage.py $(PYTHON) devscripts/prepare_manpage.py yt-dlp.1.temp.md pandoc -s -f $(MARKDOWN) -t man yt-dlp.1.temp.md -o yt-dlp.1 rm -f yt-dlp.1.temp.md completions/bash/yt-dlp: $(CODE_FILES) devscripts/bash-completion.in mkdir -p completions/bash $(PYTHON) devscripts/bash-completion.py completions/zsh/_yt-dlp: $(CODE_FILES) devscripts/zsh-completion.in mkdir -p completions/zsh $(PYTHON) devscripts/zsh-completion.py completions/fish/yt-dlp.fish: $(CODE_FILES) devscripts/fish-completion.in mkdir -p completions/fish $(PYTHON) devscripts/fish-completion.py _EXTRACTOR_FILES_CMD = find yt_dlp/extractor -name '*.py' -and -not -name 'lazy_extractors.py' _EXTRACTOR_FILES != $(_EXTRACTOR_FILES_CMD) _EXTRACTOR_FILES ?= $(shell $(_EXTRACTOR_FILES_CMD)) yt_dlp/extractor/lazy_extractors.py: devscripts/make_lazy_extractors.py devscripts/lazy_load_template.py $(_EXTRACTOR_FILES) $(PYTHON) devscripts/make_lazy_extractors.py $@ yt-dlp.tar.gz: all @$(GNUTAR) -czf yt-dlp.tar.gz --transform "s|^|yt-dlp/|" --owner 0 --group 0 \ --exclude '*.DS_Store' \ --exclude '*.kate-swp' \ --exclude '*.pyc' \ --exclude '*.pyo' \ --exclude '*~' \ --exclude '__pycache__' \ --exclude '.*_cache' \ --exclude '.git' \ -- \ README.md supportedsites.md Changelog.md LICENSE \ CONTRIBUTING.md Collaborators.md CONTRIBUTORS AUTHORS \ Makefile yt-dlp.1 README.txt completions .gitignore \ setup.cfg yt-dlp yt_dlp pyproject.toml devscripts test AUTHORS: Changelog.md @if [ -d '.git' ] && command -v git > /dev/null ; then \ echo 'Generating $@ from git commit history' ; \ git shortlog -s -n HEAD | cut -f2 | sort > $@ ; \ fi CONTRIBUTORS: Changelog.md @if [ -d '.git' ] && command -v git > /dev/null ; then \ echo 'Updating $@ from git commit history' ; \ $(PYTHON) devscripts/make_changelog.py -v -c > /dev/null ; \ fi �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/README.md�������������������������������������������������������������������������0000664�0000000�0000000�00000505270�14675634471�0014643�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<!-- MANPAGE: BEGIN EXCLUDED SECTION --> <div align="center"> [![YT-DLP](https://raw.githubusercontent.com/yt-dlp/yt-dlp/master/.github/banner.svg)](#readme) [![Release version](https://img.shields.io/github/v/release/yt-dlp/yt-dlp?color=brightgreen&label=Download&style=for-the-badge)](#installation "Installation") [![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp "PyPi") [![Donate](https://img.shields.io/badge/_-Donate-red.svg?logo=githubsponsors&labelColor=555555&style=for-the-badge)](Collaborators.md#collaborators "Donate") [![Matrix](https://img.shields.io/matrix/yt-dlp:matrix.org?color=brightgreen&labelColor=555555&label=&logo=element&style=for-the-badge)](https://matrix.to/#/#yt-dlp:matrix.org "Matrix") [![Discord](https://img.shields.io/discord/807245652072857610?color=blue&labelColor=555555&label=&logo=discord&style=for-the-badge)](https://discord.gg/H5MNcFW63r "Discord") [![Supported Sites](https://img.shields.io/badge/-Supported_Sites-brightgreen.svg?style=for-the-badge)](supportedsites.md "Supported Sites") [![License: Unlicense](https://img.shields.io/badge/-Unlicense-blue.svg?style=for-the-badge)](LICENSE "License") [![CI Status](https://img.shields.io/github/actions/workflow/status/yt-dlp/yt-dlp/core.yml?branch=master&label=Tests&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/actions "CI Status") [![Commits](https://img.shields.io/github/commit-activity/m/yt-dlp/yt-dlp?label=commits&style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/commits "Commit History") [![Last Commit](https://img.shields.io/github/last-commit/yt-dlp/yt-dlp/master?label=&style=for-the-badge&display_timestamp=committer)](https://github.com/yt-dlp/yt-dlp/pulse/monthly "Last activity") </div> <!-- MANPAGE: END EXCLUDED SECTION --> yt-dlp is a feature-rich command-line audio/video downloader with support for [thousands of sites](supportedsites.md). The project is a fork of [youtube-dl](https://github.com/ytdl-org/youtube-dl) based on the now inactive [youtube-dlc](https://github.com/blackjack4494/yt-dlc). <!-- MANPAGE: MOVE "USAGE AND OPTIONS" SECTION HERE --> <!-- MANPAGE: BEGIN EXCLUDED SECTION --> * [INSTALLATION](#installation) * [Detailed instructions](https://github.com/yt-dlp/yt-dlp/wiki/Installation) * [Release Files](#release-files) * [Update](#update) * [Dependencies](#dependencies) * [Compile](#compile) * [USAGE AND OPTIONS](#usage-and-options) * [General Options](#general-options) * [Network Options](#network-options) * [Geo-restriction](#geo-restriction) * [Video Selection](#video-selection) * [Download Options](#download-options) * [Filesystem Options](#filesystem-options) * [Thumbnail Options](#thumbnail-options) * [Internet Shortcut Options](#internet-shortcut-options) * [Verbosity and Simulation Options](#verbosity-and-simulation-options) * [Workarounds](#workarounds) * [Video Format Options](#video-format-options) * [Subtitle Options](#subtitle-options) * [Authentication Options](#authentication-options) * [Post-processing Options](#post-processing-options) * [SponsorBlock Options](#sponsorblock-options) * [Extractor Options](#extractor-options) * [CONFIGURATION](#configuration) * [Configuration file encoding](#configuration-file-encoding) * [Authentication with netrc](#authentication-with-netrc) * [Notes about environment variables](#notes-about-environment-variables) * [OUTPUT TEMPLATE](#output-template) * [Output template examples](#output-template-examples) * [FORMAT SELECTION](#format-selection) * [Filtering Formats](#filtering-formats) * [Sorting Formats](#sorting-formats) * [Format Selection examples](#format-selection-examples) * [MODIFYING METADATA](#modifying-metadata) * [Modifying metadata examples](#modifying-metadata-examples) * [EXTRACTOR ARGUMENTS](#extractor-arguments) * [PLUGINS](#plugins) * [Installing Plugins](#installing-plugins) * [Developing Plugins](#developing-plugins) * [EMBEDDING YT-DLP](#embedding-yt-dlp) * [Embedding examples](#embedding-examples) * [CHANGES FROM YOUTUBE-DL](#changes-from-youtube-dl) * [New features](#new-features) * [Differences in default behavior](#differences-in-default-behavior) * [Deprecated options](#deprecated-options) * [CONTRIBUTING](CONTRIBUTING.md#contributing-to-yt-dlp) * [Opening an Issue](CONTRIBUTING.md#opening-an-issue) * [Developer Instructions](CONTRIBUTING.md#developer-instructions) * [WIKI](https://github.com/yt-dlp/yt-dlp/wiki) * [FAQ](https://github.com/yt-dlp/yt-dlp/wiki/FAQ) <!-- MANPAGE: END EXCLUDED SECTION --> # INSTALLATION <!-- MANPAGE: BEGIN EXCLUDED SECTION --> [![Windows](https://img.shields.io/badge/-Windows_x64-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe) [![Unix](https://img.shields.io/badge/-Linux/BSD-red.svg?style=for-the-badge&logo=linux)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp) [![MacOS](https://img.shields.io/badge/-MacOS-lightblue.svg?style=for-the-badge&logo=apple)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos) [![PyPi](https://img.shields.io/badge/-PyPi-blue.svg?logo=pypi&labelColor=555555&style=for-the-badge)](https://pypi.org/project/yt-dlp) [![Source Tarball](https://img.shields.io/badge/-Source_tar-green.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) [![Other variants](https://img.shields.io/badge/-Other-grey.svg?style=for-the-badge)](#release-files) [![All versions](https://img.shields.io/badge/-All_Versions-lightgrey.svg?style=for-the-badge)](https://github.com/yt-dlp/yt-dlp/releases) <!-- MANPAGE: END EXCLUDED SECTION --> You can install yt-dlp using [the binaries](#release-files), [pip](https://pypi.org/project/yt-dlp) or one using a third-party package manager. See [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation) for detailed instructions <!-- MANPAGE: BEGIN EXCLUDED SECTION --> ## RELEASE FILES #### Recommended File|Description :---|:--- [yt-dlp](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp)|Platform-independent [zipimport](https://docs.python.org/3/library/zipimport.html) binary. Needs Python (recommended for **Linux/BSD**) [yt-dlp.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.exe)|Windows (Win7 SP1+) standalone x64 binary (recommended for **Windows**) [yt-dlp_macos](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos)|Universal MacOS (10.15+) standalone executable (recommended for **MacOS**) #### Alternatives File|Description :---|:--- [yt-dlp_x86.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_x86.exe)|Windows (Win7 SP1+) standalone x86 (32-bit) binary [yt-dlp_min.exe](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_min.exe)|Windows (Win7 SP1+) standalone x64 binary built with `py2exe`<br/> ([Not recommended](#standalone-py2exe-builds-windows)) [yt-dlp_linux](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux)|Linux standalone x64 binary [yt-dlp_linux_armv7l](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_armv7l)|Linux standalone armv7l (32-bit) binary [yt-dlp_linux_aarch64](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_linux_aarch64)|Linux standalone aarch64 (64-bit) binary [yt-dlp_win.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_win.zip)|Unpackaged Windows executable (no auto-update) [yt-dlp_macos.zip](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos.zip)|Unpackaged MacOS (10.15+) executable (no auto-update) [yt-dlp_macos_legacy](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp_macos_legacy)|MacOS (10.9+) standalone x64 executable #### Misc File|Description :---|:--- [yt-dlp.tar.gz](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz)|Source tarball [SHA2-512SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS)|GNU-style SHA512 sums [SHA2-512SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-512SUMS.sig)|GPG signature file for SHA512 sums [SHA2-256SUMS](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS)|GNU-style SHA256 sums [SHA2-256SUMS.sig](https://github.com/yt-dlp/yt-dlp/releases/latest/download/SHA2-256SUMS.sig)|GPG signature file for SHA256 sums The public key that can be used to verify the GPG signatures is [available here](https://github.com/yt-dlp/yt-dlp/blob/master/public.key) Example usage: ``` curl -L https://github.com/yt-dlp/yt-dlp/raw/master/public.key | gpg --import gpg --verify SHA2-256SUMS.sig SHA2-256SUMS gpg --verify SHA2-512SUMS.sig SHA2-512SUMS ``` <!-- MANPAGE: END EXCLUDED SECTION --> **Note**: The manpages, shell completion (autocomplete) files etc. are available inside the [source tarball](https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp.tar.gz) ## UPDATE You can use `yt-dlp -U` to update if you are using the [release binaries](#release-files) If you [installed with pip](https://github.com/yt-dlp/yt-dlp/wiki/Installation#with-pip), simply re-run the same command that was used to install the program For other third-party package managers, see [the wiki](https://github.com/yt-dlp/yt-dlp/wiki/Installation#third-party-package-managers) or refer to their documentation <a id="update-channels"></a> There are currently three release channels for binaries: `stable`, `nightly` and `master`. * `stable` is the default channel, and many of its changes have been tested by users of the `nightly` and `master` channels. * The `nightly` channel has releases scheduled to build every day around midnight UTC, for a snapshot of the project's new patches and changes. This is the **recommended channel for regular users** of yt-dlp. The `nightly` releases are available from [yt-dlp/yt-dlp-nightly-builds](https://github.com/yt-dlp/yt-dlp-nightly-builds/releases) or as development releases of the `yt-dlp` PyPI package (which can be installed with pip's `--pre` flag). * The `master` channel features releases that are built after each push to the master branch, and these will have the very latest fixes and additions, but may also be more prone to regressions. They are available from [yt-dlp/yt-dlp-master-builds](https://github.com/yt-dlp/yt-dlp-master-builds/releases). When using `--update`/`-U`, a release binary will only update to its current channel. `--update-to CHANNEL` can be used to switch to a different channel when a newer version is available. `--update-to [CHANNEL@]TAG` can also be used to upgrade or downgrade to specific tags from a channel. You may also use `--update-to <repository>` (`<owner>/<repository>`) to update to a channel on a completely different repository. Be careful with what repository you are updating to though, there is no verification done for binaries from different repositories. Example usage: * `yt-dlp --update-to master` switch to the `master` channel and update to its latest release * `yt-dlp --update-to stable@2023.07.06` upgrade/downgrade to release to `stable` channel tag `2023.07.06` * `yt-dlp --update-to 2023.10.07` upgrade/downgrade to tag `2023.10.07` if it exists on the current channel * `yt-dlp --update-to example/yt-dlp@2023.09.24` upgrade/downgrade to the release from the `example/yt-dlp` repository, tag `2023.09.24` **Important**: Any user experiencing an issue with the `stable` release should install or update to the `nightly` release before submitting a bug report: ``` # To update to nightly from stable executable/binary: yt-dlp --update-to nightly # To install nightly with pip: python3 -m pip install -U --pre "yt-dlp[default]" ``` ## DEPENDENCIES Python versions 3.8+ (CPython and PyPy) are supported. Other versions and implementations may or may not work correctly. <!-- Python 3.5+ uses VC++14 and it is already embedded in the binary created <!x-- https://www.microsoft.com/en-us/download/details.aspx?id=26999 --x> On windows, [Microsoft Visual C++ 2010 SP1 Redistributable Package (x86)](https://download.microsoft.com/download/1/6/5/165255E7-1014-4D0A-B094-B6A430A6BFFC/vcredist_x86.exe) is also necessary to run yt-dlp. You probably already have this, but if the executable throws an error due to missing `MSVCR100.dll` you need to install it manually. --> While all the other dependencies are optional, `ffmpeg` and `ffprobe` are highly recommended ### Strongly recommended * [**ffmpeg** and **ffprobe**](https://www.ffmpeg.org) - Required for [merging separate video and audio files](#format-selection), as well as for various [post-processing](#post-processing-options) tasks. License [depends on the build](https://www.ffmpeg.org/legal.html) There are bugs in ffmpeg that cause various issues when used alongside yt-dlp. Since ffmpeg is such an important dependency, we provide [custom builds](https://github.com/yt-dlp/FFmpeg-Builds#ffmpeg-static-auto-builds) with patches for some of these issues at [yt-dlp/FFmpeg-Builds](https://github.com/yt-dlp/FFmpeg-Builds). See [the readme](https://github.com/yt-dlp/FFmpeg-Builds#patches-applied) for details on the specific issues solved by these builds **Important**: What you need is ffmpeg *binary*, **NOT** [the Python package of the same name](https://pypi.org/project/ffmpeg) ### Networking * [**certifi**](https://github.com/certifi/python-certifi)\* - Provides Mozilla's root certificate bundle. Licensed under [MPLv2](https://github.com/certifi/python-certifi/blob/master/LICENSE) * [**brotli**](https://github.com/google/brotli)\* or [**brotlicffi**](https://github.com/python-hyper/brotlicffi) - [Brotli](https://en.wikipedia.org/wiki/Brotli) content encoding support. Both licensed under MIT <sup>[1](https://github.com/google/brotli/blob/master/LICENSE) [2](https://github.com/python-hyper/brotlicffi/blob/master/LICENSE) </sup> * [**websockets**](https://github.com/aaugustin/websockets)\* - For downloading over websocket. Licensed under [BSD-3-Clause](https://github.com/aaugustin/websockets/blob/main/LICENSE) * [**requests**](https://github.com/psf/requests)\* - HTTP library. For HTTPS proxy and persistent connections support. Licensed under [Apache-2.0](https://github.com/psf/requests/blob/main/LICENSE) #### Impersonation The following provide support for impersonating browser requests. This may be required for some sites that employ TLS fingerprinting. * [**curl_cffi**](https://github.com/lexiforest/curl_cffi) (recommended) - Python binding for [curl-impersonate](https://github.com/lexiforest/curl-impersonate). Provides impersonation targets for Chrome, Edge and Safari. Licensed under [MIT](https://github.com/lexiforest/curl_cffi/blob/main/LICENSE) * Can be installed with the `curl-cffi` group, e.g. `pip install "yt-dlp[default,curl-cffi]"` * Currently included in `yt-dlp.exe`, `yt-dlp_linux` and `yt-dlp_macos` builds ### Metadata * [**mutagen**](https://github.com/quodlibet/mutagen)\* - For `--embed-thumbnail` in certain formats. Licensed under [GPLv2+](https://github.com/quodlibet/mutagen/blob/master/COPYING) * [**AtomicParsley**](https://github.com/wez/atomicparsley) - For `--embed-thumbnail` in `mp4`/`m4a` files when `mutagen`/`ffmpeg` cannot. Licensed under [GPLv2+](https://github.com/wez/atomicparsley/blob/master/COPYING) * [**xattr**](https://github.com/xattr/xattr), [**pyxattr**](https://github.com/iustin/pyxattr) or [**setfattr**](http://savannah.nongnu.org/projects/attr) - For writing xattr metadata (`--xattr`) on **Mac** and **BSD**. Licensed under [MIT](https://github.com/xattr/xattr/blob/master/LICENSE.txt), [LGPL2.1](https://github.com/iustin/pyxattr/blob/master/COPYING) and [GPLv2+](http://git.savannah.nongnu.org/cgit/attr.git/tree/doc/COPYING) respectively ### Misc * [**pycryptodomex**](https://github.com/Legrandin/pycryptodome)\* - For decrypting AES-128 HLS streams and various other data. Licensed under [BSD-2-Clause](https://github.com/Legrandin/pycryptodome/blob/master/LICENSE.rst) * [**phantomjs**](https://github.com/ariya/phantomjs) - Used in extractors where javascript needs to be run. Licensed under [BSD-3-Clause](https://github.com/ariya/phantomjs/blob/master/LICENSE.BSD) * [**secretstorage**](https://github.com/mitya57/secretstorage)\* - For `--cookies-from-browser` to access the **Gnome** keyring while decrypting cookies of **Chromium**-based browsers on **Linux**. Licensed under [BSD-3-Clause](https://github.com/mitya57/secretstorage/blob/master/LICENSE) * Any external downloader that you want to use with `--downloader` ### Deprecated * [**avconv** and **avprobe**](https://www.libav.org) - Now **deprecated** alternative to ffmpeg. License [depends on the build](https://libav.org/legal) * [**sponskrub**](https://github.com/faissaloo/SponSkrub) - For using the now **deprecated** [sponskrub options](#sponskrub-options). Licensed under [GPLv3+](https://github.com/faissaloo/SponSkrub/blob/master/LICENCE.md) * [**rtmpdump**](http://rtmpdump.mplayerhq.hu) - For downloading `rtmp` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](http://rtmpdump.mplayerhq.hu) * [**mplayer**](http://mplayerhq.hu/design7/info.html) or [**mpv**](https://mpv.io) - For downloading `rstp`/`mms` streams. ffmpeg can be used instead with `--downloader ffmpeg`. Licensed under [GPLv2+](https://github.com/mpv-player/mpv/blob/master/Copyright) To use or redistribute the dependencies, you must agree to their respective licensing terms. The standalone release binaries are built with the Python interpreter and the packages marked with **\*** included. If you do not have the necessary dependencies for a task you are attempting, yt-dlp will warn you. All the currently available dependencies are visible at the top of the `--verbose` output ## COMPILE ### Standalone PyInstaller Builds To build the standalone executable, you must have Python and `pyinstaller` (plus any of yt-dlp's [optional dependencies](#dependencies) if needed). The executable will be built for the same CPU architecture as the Python used. You can run the following commands: ``` python3 devscripts/install_deps.py --include pyinstaller python3 devscripts/make_lazy_extractors.py python3 -m bundle.pyinstaller ``` On some systems, you may need to use `py` or `python` instead of `python3`. `python -m bundle.pyinstaller` accepts any arguments that can be passed to `pyinstaller`, such as `--onefile/-F` or `--onedir/-D`, which is further [documented here](https://pyinstaller.org/en/stable/usage.html#what-to-generate). **Note**: Pyinstaller versions below 4.4 [do not support](https://github.com/pyinstaller/pyinstaller#requirements-and-tested-platforms) Python installed from the Windows store without using a virtual environment. **Important**: Running `pyinstaller` directly **instead of** using `python -m bundle.pyinstaller` is **not** officially supported. This may or may not work correctly. ### Platform-independent Binary (UNIX) You will need the build tools `python` (3.8+), `zip`, `make` (GNU), `pandoc`\* and `pytest`\*. After installing these, simply run `make`. You can also run `make yt-dlp` instead to compile only the binary without updating any of the additional files. (The build tools marked with **\*** are not needed for this) ### Standalone Py2Exe Builds (Windows) While we provide the option to build with [py2exe](https://www.py2exe.org), it is recommended to build [using PyInstaller](#standalone-pyinstaller-builds) instead since the py2exe builds **cannot contain `pycryptodomex`/`certifi`/`requests` and need VC++14** on the target computer to run. If you wish to build it anyway, install Python (if it is not already installed) and you can run the following commands: ``` py devscripts/install_deps.py --include py2exe py devscripts/make_lazy_extractors.py py -m bundle.py2exe ``` ### Related scripts * **`devscripts/install_deps.py`** - Install dependencies for yt-dlp. * **`devscripts/update-version.py`** - Update the version number based on the current date. * **`devscripts/set-variant.py`** - Set the build variant of the executable. * **`devscripts/make_changelog.py`** - Create a markdown changelog using short commit messages and update `CONTRIBUTORS` file. * **`devscripts/make_lazy_extractors.py`** - Create lazy extractors. Running this before building the binaries (any variant) will improve their startup performance. Set the environment variable `YTDLP_NO_LAZY_EXTRACTORS=1` if you wish to forcefully disable lazy extractor loading. Note: See their `--help` for more info. ### Forking the project If you fork the project on GitHub, you can run your fork's [build workflow](.github/workflows/build.yml) to automatically build the selected version(s) as artifacts. Alternatively, you can run the [release workflow](.github/workflows/release.yml) or enable the [nightly workflow](.github/workflows/release-nightly.yml) to create full (pre-)releases. # USAGE AND OPTIONS <!-- MANPAGE: BEGIN EXCLUDED SECTION --> yt-dlp [OPTIONS] [--] URL [URL...] `Ctrl+F` is your friend :D <!-- MANPAGE: END EXCLUDED SECTION --> <!-- Auto generated --> ## General Options: -h, --help Print this help text and exit --version Print program version and exit -U, --update Update this program to the latest version --no-update Do not check for updates (default) --update-to [CHANNEL]@[TAG] Upgrade/downgrade to a specific version. CHANNEL can be a repository as well. CHANNEL and TAG default to "stable" and "latest" respectively if omitted; See "UPDATE" for details. Supported channels: stable, nightly, master -i, --ignore-errors Ignore download and postprocessing errors. The download will be considered successful even if the postprocessing fails --no-abort-on-error Continue with next video on download errors; e.g. to skip unavailable videos in a playlist (default) --abort-on-error Abort downloading of further videos if an error occurs (Alias: --no-ignore-errors) --dump-user-agent Display the current user-agent and exit --list-extractors List all supported extractors and exit --extractor-descriptions Output descriptions of all supported extractors and exit --use-extractors NAMES Extractor names to use separated by commas. You can also use regexes, "all", "default" and "end" (end URL matching); e.g. --ies "holodex.*,end,youtube". Prefix the name with a "-" to exclude it, e.g. --ies default,-generic. Use --list-extractors for a list of extractor names. (Alias: --ies) --default-search PREFIX Use this prefix for unqualified URLs. E.g. "gvsearch2:python" downloads two videos from google videos for the search term "python". Use the value "auto" to let yt-dlp guess ("auto_warning" to emit a warning when guessing). "error" just throws an error. The default value "fixup_error" repairs broken URLs, but emits an error if this is not possible instead of searching --ignore-config Don't load any more configuration files except those given to --config-locations. For backward compatibility, if this option is found inside the system configuration file, the user configuration is not loaded. (Alias: --no-config) --no-config-locations Do not load any custom configuration files (default). When given inside a configuration file, ignore all previous --config-locations defined in the current file --config-locations PATH Location of the main configuration file; either the path to the config or its containing directory ("-" for stdin). Can be used multiple times and inside other configuration files --flat-playlist Do not extract the videos of a playlist, only list them --no-flat-playlist Fully extract the videos of a playlist (default) --live-from-start Download livestreams from the start. Currently only supported for YouTube (Experimental) --no-live-from-start Download livestreams from the current time (default) --wait-for-video MIN[-MAX] Wait for scheduled streams to become available. Pass the minimum number of seconds (or range) to wait between retries --no-wait-for-video Do not wait for scheduled streams (default) --mark-watched Mark videos watched (even with --simulate) --no-mark-watched Do not mark videos watched (default) --color [STREAM:]POLICY Whether to emit color codes in output, optionally prefixed by the STREAM (stdout or stderr) to apply the setting to. Can be one of "always", "auto" (default), "never", or "no_color" (use non color terminal sequences). Use "auto-tty" or "no_color-tty" to decide based on terminal support only. Can be used multiple times --compat-options OPTS Options that can help keep compatibility with youtube-dl or youtube-dlc configurations by reverting some of the changes made in yt-dlp. See "Differences in default behavior" for details --alias ALIASES OPTIONS Create aliases for an option string. Unless an alias starts with a dash "-", it is prefixed with "--". Arguments are parsed according to the Python string formatting mini-language. E.g. --alias get-audio,-X "-S=aext:{0},abr -x --audio-format {0}" creates options "--get-audio" and "-X" that takes an argument (ARG0) and expands to "-S=aext:ARG0,abr -x --audio-format ARG0". All defined aliases are listed in the --help output. Alias options can trigger more aliases; so be careful to avoid defining recursive options. As a safety measure, each alias may be triggered a maximum of 100 times. This option can be used multiple times ## Network Options: --proxy URL Use the specified HTTP/HTTPS/SOCKS proxy. To enable SOCKS proxy, specify a proper scheme, e.g. socks5://user:pass@127.0.0.1:1080/. Pass in an empty string (--proxy "") for direct connection --socket-timeout SECONDS Time to wait before giving up, in seconds --source-address IP Client-side IP address to bind to --impersonate CLIENT[:OS] Client to impersonate for requests. E.g. chrome, chrome-110, chrome:windows-10. Pass --impersonate="" to impersonate any client. Note that forcing impersonation for all requests may have a detrimental impact on download speed and stability --list-impersonate-targets List available clients to impersonate. -4, --force-ipv4 Make all connections via IPv4 -6, --force-ipv6 Make all connections via IPv6 --enable-file-urls Enable file:// URLs. This is disabled by default for security reasons. ## Geo-restriction: --geo-verification-proxy URL Use this proxy to verify the IP address for some geo-restricted sites. The default proxy specified by --proxy (or none, if the option is not present) is used for the actual downloading --xff VALUE How to fake X-Forwarded-For HTTP header to try bypassing geographic restriction. One of "default" (only when known to be useful), "never", an IP block in CIDR notation, or a two-letter ISO 3166-2 country code ## Video Selection: -I, --playlist-items ITEM_SPEC Comma separated playlist_index of the items to download. You can specify a range using "[START]:[STOP][:STEP]". For backward compatibility, START-STOP is also supported. Use negative indices to count from the right and negative STEP to download in reverse order. E.g. "-I 1:3,7,-5::2" used on a playlist of size 15 will download the items at index 1,2,3,7,11,13,15 --min-filesize SIZE Abort download if filesize is smaller than SIZE, e.g. 50k or 44.6M --max-filesize SIZE Abort download if filesize is larger than SIZE, e.g. 50k or 44.6M --date DATE Download only videos uploaded on this date. The date can be "YYYYMMDD" or in the format [now|today|yesterday][-N[day|week|month|year]]. E.g. "--date today-2weeks" downloads only videos uploaded on the same day two weeks ago --datebefore DATE Download only videos uploaded on or before this date. The date formats accepted is the same as --date --dateafter DATE Download only videos uploaded on or after this date. The date formats accepted is the same as --date --match-filters FILTER Generic video filter. Any "OUTPUT TEMPLATE" field can be compared with a number or a string using the operators defined in "Filtering Formats". You can also simply specify a field to match if the field is present, use "!field" to check if the field is not present, and "&" to check multiple conditions. Use a "\" to escape "&" or quotes if needed. If used multiple times, the filter matches if at least one of the conditions is met. E.g. --match-filters !is_live --match-filters "like_count>?100 & description~='(?i)\bcats \& dogs\b'" matches only videos that are not live OR those that have a like count more than 100 (or the like field is not available) and also has a description that contains the phrase "cats & dogs" (caseless). Use "--match-filters -" to interactively ask whether to download each video --no-match-filters Do not use any --match-filters (default) --break-match-filters FILTER Same as "--match-filters" but stops the download process when a video is rejected --no-break-match-filters Do not use any --break-match-filters (default) --no-playlist Download only the video, if the URL refers to a video and a playlist --yes-playlist Download the playlist, if the URL refers to a video and a playlist --age-limit YEARS Download only videos suitable for the given age --download-archive FILE Download only videos not listed in the archive file. Record the IDs of all downloaded videos in it --no-download-archive Do not use archive file (default) --max-downloads NUMBER Abort after downloading NUMBER files --break-on-existing Stop the download process when encountering a file that is in the archive --no-break-on-existing Do not stop the download process when encountering a file that is in the archive (default) --break-per-input Alters --max-downloads, --break-on-existing, --break-match-filters, and autonumber to reset per input URL --no-break-per-input --break-on-existing and similar options terminates the entire download queue --skip-playlist-after-errors N Number of allowed failures until the rest of the playlist is skipped ## Download Options: -N, --concurrent-fragments N Number of fragments of a dash/hlsnative video that should be downloaded concurrently (default is 1) -r, --limit-rate RATE Maximum download rate in bytes per second, e.g. 50K or 4.2M --throttled-rate RATE Minimum download rate in bytes per second below which throttling is assumed and the video data is re-extracted, e.g. 100K -R, --retries RETRIES Number of retries (default is 10), or "infinite" --file-access-retries RETRIES Number of times to retry on file access error (default is 3), or "infinite" --fragment-retries RETRIES Number of retries for a fragment (default is 10), or "infinite" (DASH, hlsnative and ISM) --retry-sleep [TYPE:]EXPR Time to sleep between retries in seconds (optionally) prefixed by the type of retry (http (default), fragment, file_access, extractor) to apply the sleep to. EXPR can be a number, linear=START[:END[:STEP=1]] or exp=START[:END[:BASE=2]]. This option can be used multiple times to set the sleep for the different retry types, e.g. --retry-sleep linear=1::2 --retry-sleep fragment:exp=1:20 --skip-unavailable-fragments Skip unavailable fragments for DASH, hlsnative and ISM downloads (default) (Alias: --no-abort-on-unavailable-fragments) --abort-on-unavailable-fragments Abort download if a fragment is unavailable (Alias: --no-skip-unavailable-fragments) --keep-fragments Keep downloaded fragments on disk after downloading is finished --no-keep-fragments Delete downloaded fragments after downloading is finished (default) --buffer-size SIZE Size of download buffer, e.g. 1024 or 16K (default is 1024) --resize-buffer The buffer size is automatically resized from an initial value of --buffer-size (default) --no-resize-buffer Do not automatically adjust the buffer size --http-chunk-size SIZE Size of a chunk for chunk-based HTTP downloading, e.g. 10485760 or 10M (default is disabled). May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) --playlist-random Download playlist videos in random order --lazy-playlist Process entries in the playlist as they are received. This disables n_entries, --playlist-random and --playlist-reverse --no-lazy-playlist Process videos in the playlist only after the entire playlist is parsed (default) --xattr-set-filesize Set file xattribute ytdl.filesize with expected file size --hls-use-mpegts Use the mpegts container for HLS videos; allowing some players to play the video while downloading, and reducing the chance of file corruption if download is interrupted. This is enabled by default for live streams --no-hls-use-mpegts Do not use the mpegts container for HLS videos. This is default when not downloading live streams --download-sections REGEX Download only chapters that match the regular expression. A "*" prefix denotes time-range instead of chapter. Negative timestamps are calculated from the end. "*from-url" can be used to download between the "start_time" and "end_time" extracted from the URL. Needs ffmpeg. This option can be used multiple times to download multiple sections, e.g. --download-sections "*10:15-inf" --download-sections "intro" --downloader [PROTO:]NAME Name or path of the external downloader to use (optionally) prefixed by the protocols (http, ftp, m3u8, dash, rstp, rtmp, mms) to use it for. Currently supports native, aria2c, avconv, axel, curl, ffmpeg, httpie, wget. You can use this option multiple times to set different downloaders for different protocols. E.g. --downloader aria2c --downloader "dash,m3u8:native" will use aria2c for http/ftp downloads, and the native downloader for dash/m3u8 downloads (Alias: --external-downloader) --downloader-args NAME:ARGS Give these arguments to the external downloader. Specify the downloader name and the arguments separated by a colon ":". For ffmpeg, arguments can be passed to different positions using the same syntax as --postprocessor-args. You can use this option multiple times to give different arguments to different downloaders (Alias: --external-downloader-args) ## Filesystem Options: -a, --batch-file FILE File containing URLs to download ("-" for stdin), one URL per line. Lines starting with "#", ";" or "]" are considered as comments and ignored --no-batch-file Do not read URLs from batch file (default) -P, --paths [TYPES:]PATH The paths where the files should be downloaded. Specify the type of file and the path separated by a colon ":". All the same TYPES as --output are supported. Additionally, you can also provide "home" (default) and "temp" paths. All intermediary files are first downloaded to the temp path and then the final files are moved over to the home path after download is finished. This option is ignored if --output is an absolute path -o, --output [TYPES:]TEMPLATE Output filename template; see "OUTPUT TEMPLATE" for details --output-na-placeholder TEXT Placeholder for unavailable fields in --output (default: "NA") --restrict-filenames Restrict filenames to only ASCII characters, and avoid "&" and spaces in filenames --no-restrict-filenames Allow Unicode characters, "&" and spaces in filenames (default) --windows-filenames Force filenames to be Windows-compatible --no-windows-filenames Make filenames Windows-compatible only if using Windows (default) --trim-filenames LENGTH Limit the filename length (excluding extension) to the specified number of characters -w, --no-overwrites Do not overwrite any files --force-overwrites Overwrite all video and metadata files. This option includes --no-continue --no-force-overwrites Do not overwrite the video, but overwrite related files (default) -c, --continue Resume partially downloaded files/fragments (default) --no-continue Do not resume partially downloaded fragments. If the file is not fragmented, restart download of the entire file --part Use .part files instead of writing directly into output file (default) --no-part Do not use .part files - write directly into output file --mtime Use the Last-modified header to set the file modification time (default) --no-mtime Do not use the Last-modified header to set the file modification time --write-description Write video description to a .description file --no-write-description Do not write video description (default) --write-info-json Write video metadata to a .info.json file (this may contain personal information) --no-write-info-json Do not write video metadata (default) --write-playlist-metafiles Write playlist metadata in addition to the video metadata when using --write-info-json, --write-description etc. (default) --no-write-playlist-metafiles Do not write playlist metadata when using --write-info-json, --write-description etc. --clean-info-json Remove some internal metadata such as filenames from the infojson (default) --no-clean-info-json Write all fields to the infojson --write-comments Retrieve video comments to be placed in the infojson. The comments are fetched even without this option if the extraction is known to be quick (Alias: --get-comments) --no-write-comments Do not retrieve video comments unless the extraction is known to be quick (Alias: --no-get-comments) --load-info-json FILE JSON file containing the video information (created with the "--write-info-json" option) --cookies FILE Netscape formatted file to read cookies from and dump cookie jar in --no-cookies Do not read/dump cookies from/to file (default) --cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER] The name of the browser to load cookies from. Currently supported browsers are: brave, chrome, chromium, edge, firefox, opera, safari, vivaldi, whale. Optionally, the KEYRING used for decrypting Chromium cookies on Linux, the name/path of the PROFILE to load cookies from, and the CONTAINER name (if Firefox) ("none" for no container) can be given with their respective separators. By default, all containers of the most recently accessed profile are used. Currently supported keyrings are: basictext, gnomekeyring, kwallet, kwallet5, kwallet6 --no-cookies-from-browser Do not load cookies from browser (default) --cache-dir DIR Location in the filesystem where yt-dlp can store some downloaded information (such as client ids and signatures) permanently. By default ${XDG_CACHE_HOME}/yt-dlp --no-cache-dir Disable filesystem caching --rm-cache-dir Delete all filesystem cache files ## Thumbnail Options: --write-thumbnail Write thumbnail image to disk --no-write-thumbnail Do not write thumbnail image to disk (default) --write-all-thumbnails Write all thumbnail image formats to disk --list-thumbnails List available thumbnails of each video. Simulate unless --no-simulate is used ## Internet Shortcut Options: --write-link Write an internet shortcut file, depending on the current platform (.url, .webloc or .desktop). The URL may be cached by the OS --write-url-link Write a .url Windows internet shortcut. The OS caches the URL based on the file path --write-webloc-link Write a .webloc macOS internet shortcut --write-desktop-link Write a .desktop Linux internet shortcut ## Verbosity and Simulation Options: -q, --quiet Activate quiet mode. If used with --verbose, print the log to stderr --no-quiet Deactivate quiet mode. (Default) --no-warnings Ignore warnings -s, --simulate Do not download the video and do not write anything to disk --no-simulate Download the video even if printing/listing options are used --ignore-no-formats-error Ignore "No video formats" error. Useful for extracting metadata even if the videos are not actually available for download (experimental) --no-ignore-no-formats-error Throw error when no downloadable video formats are found (default) --skip-download Do not download the video but write all related files (Alias: --no-download) -O, --print [WHEN:]TEMPLATE Field name or output template to print to screen, optionally prefixed with when to print it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor (default: video). Implies --quiet. Implies --simulate unless --no-simulate or later stages of WHEN are used. This option can be used multiple times --print-to-file [WHEN:]TEMPLATE FILE Append given template to the file. The values of WHEN and TEMPLATE are same as that of --print. FILE uses the same syntax as the output template. This option can be used multiple times -j, --dump-json Quiet, but print JSON information for each video. Simulate unless --no-simulate is used. See "OUTPUT TEMPLATE" for a description of available keys -J, --dump-single-json Quiet, but print JSON information for each url or infojson passed. Simulate unless --no-simulate is used. If the URL refers to a playlist, the whole playlist information is dumped in a single line --force-write-archive Force download archive entries to be written as far as no errors occur, even if -s or another simulation option is used (Alias: --force-download-archive) --newline Output progress bar as new lines --no-progress Do not print progress bar --progress Show progress bar, even if in quiet mode --console-title Display progress in console titlebar --progress-template [TYPES:]TEMPLATE Template for progress outputs, optionally prefixed with one of "download:" (default), "download-title:" (the console title), "postprocess:", or "postprocess-title:". The video's fields are accessible under the "info" key and the progress attributes are accessible under "progress" key. E.g. --console-title --progress-template "download-title:%(info.id)s-%(progress.eta)s" --progress-delta SECONDS Time between progress output (default: 0) -v, --verbose Print various debugging information --dump-pages Print downloaded pages encoded using base64 to debug problems (very verbose) --write-pages Write downloaded intermediary pages to files in the current directory to debug problems --print-traffic Display sent and read HTTP traffic ## Workarounds: --encoding ENCODING Force the specified encoding (experimental) --legacy-server-connect Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation --no-check-certificates Suppress HTTPS certificate validation --prefer-insecure Use an unencrypted connection to retrieve information about the video (Currently supported only for YouTube) --add-headers FIELD:VALUE Specify a custom HTTP header and its value, separated by a colon ":". You can use this option multiple times --bidi-workaround Work around terminals that lack bidirectional text support. Requires bidiv or fribidi executable in PATH --sleep-requests SECONDS Number of seconds to sleep between requests during data extraction --sleep-interval SECONDS Number of seconds to sleep before each download. This is the minimum time to sleep when used along with --max-sleep-interval (Alias: --min-sleep-interval) --max-sleep-interval SECONDS Maximum number of seconds to sleep. Can only be used along with --min-sleep-interval --sleep-subtitles SECONDS Number of seconds to sleep before each subtitle download ## Video Format Options: -f, --format FORMAT Video format code, see "FORMAT SELECTION" for more details -S, --format-sort SORTORDER Sort the formats by the fields given, see "Sorting Formats" for more details --format-sort-force Force user specified sort order to have precedence over all fields, see "Sorting Formats" for more details (Alias: --S-force) --no-format-sort-force Some fields have precedence over the user specified sort order (default) --video-multistreams Allow multiple video streams to be merged into a single file --no-video-multistreams Only one video stream is downloaded for each output file (default) --audio-multistreams Allow multiple audio streams to be merged into a single file --no-audio-multistreams Only one audio stream is downloaded for each output file (default) --prefer-free-formats Prefer video formats with free containers over non-free ones of same quality. Use with "-S ext" to strictly prefer free containers irrespective of quality --no-prefer-free-formats Don't give any special preference to free containers (default) --check-formats Make sure formats are selected only from those that are actually downloadable --check-all-formats Check all formats for whether they are actually downloadable --no-check-formats Do not check that the formats are actually downloadable -F, --list-formats List available formats of each video. Simulate unless --no-simulate is used --merge-output-format FORMAT Containers that may be used when merging formats, separated by "/", e.g. "mp4/mkv". Ignored if no merge is required. (currently supported: avi, flv, mkv, mov, mp4, webm) ## Subtitle Options: --write-subs Write subtitle file --no-write-subs Do not write subtitle file (default) --write-auto-subs Write automatically generated subtitle file (Alias: --write-automatic-subs) --no-write-auto-subs Do not write auto-generated subtitles (default) (Alias: --no-write-automatic-subs) --list-subs List available subtitles of each video. Simulate unless --no-simulate is used --sub-format FORMAT Subtitle format; accepts formats preference, e.g. "srt" or "ass/srt/best" --sub-langs LANGS Languages of the subtitles to download (can be regex) or "all" separated by commas, e.g. --sub-langs "en.*,ja". You can prefix the language code with a "-" to exclude it from the requested languages, e.g. --sub-langs all,-live_chat. Use --list-subs for a list of available language tags ## Authentication Options: -u, --username USERNAME Login with this account ID -p, --password PASSWORD Account password. If this option is left out, yt-dlp will ask interactively -2, --twofactor TWOFACTOR Two-factor authentication code -n, --netrc Use .netrc authentication data --netrc-location PATH Location of .netrc authentication data; either the path or its containing directory. Defaults to ~/.netrc --netrc-cmd NETRC_CMD Command to execute to get the credentials for an extractor. --video-password PASSWORD Video-specific password --ap-mso MSO Adobe Pass multiple-system operator (TV provider) identifier, use --ap-list-mso for a list of available MSOs --ap-username USERNAME Multiple-system operator account login --ap-password PASSWORD Multiple-system operator account password. If this option is left out, yt-dlp will ask interactively --ap-list-mso List all supported multiple-system operators --client-certificate CERTFILE Path to client certificate file in PEM format. May include the private key --client-certificate-key KEYFILE Path to private key file for client certificate --client-certificate-password PASSWORD Password for client certificate private key, if encrypted. If not provided, and the key is encrypted, yt-dlp will ask interactively ## Post-Processing Options: -x, --extract-audio Convert video files to audio-only files (requires ffmpeg and ffprobe) --audio-format FORMAT Format to convert the audio to when -x is used. (currently supported: best (default), aac, alac, flac, m4a, mp3, opus, vorbis, wav). You can specify multiple rules using similar syntax as --remux-video --audio-quality QUALITY Specify ffmpeg audio quality to use when converting the audio with -x. Insert a value between 0 (best) and 10 (worst) for VBR or a specific bitrate like 128K (default 5) --remux-video FORMAT Remux the video into another container if necessary (currently supported: avi, flv, gif, mkv, mov, mp4, webm, aac, aiff, alac, flac, m4a, mka, mp3, ogg, opus, vorbis, wav). If target container does not support the video/audio codec, remuxing will fail. You can specify multiple rules; e.g. "aac>m4a/mov>mp4/mkv" will remux aac to m4a, mov to mp4 and anything else to mkv --recode-video FORMAT Re-encode the video into another format if necessary. The syntax and supported formats are the same as --remux-video --postprocessor-args NAME:ARGS Give these arguments to the postprocessors. Specify the postprocessor/executable name and the arguments separated by a colon ":" to give the argument to the specified postprocessor/executable. Supported PP are: Merger, ModifyChapters, SplitChapters, ExtractAudio, VideoRemuxer, VideoConvertor, Metadata, EmbedSubtitle, EmbedThumbnail, SubtitlesConvertor, ThumbnailsConvertor, FixupStretched, FixupM4a, FixupM3u8, FixupTimestamp and FixupDuration. The supported executables are: AtomicParsley, FFmpeg and FFprobe. You can also specify "PP+EXE:ARGS" to give the arguments to the specified executable only when being used by the specified postprocessor. Additionally, for ffmpeg/ffprobe, "_i"/"_o" can be appended to the prefix optionally followed by a number to pass the argument before the specified input/output file, e.g. --ppa "Merger+ffmpeg_i1:-v quiet". You can use this option multiple times to give different arguments to different postprocessors. (Alias: --ppa) -k, --keep-video Keep the intermediate video file on disk after post-processing --no-keep-video Delete the intermediate video file after post-processing (default) --post-overwrites Overwrite post-processed files (default) --no-post-overwrites Do not overwrite post-processed files --embed-subs Embed subtitles in the video (only for mp4, webm and mkv videos) --no-embed-subs Do not embed subtitles (default) --embed-thumbnail Embed thumbnail in the video as cover art --no-embed-thumbnail Do not embed thumbnail (default) --embed-metadata Embed metadata to the video file. Also embeds chapters/infojson if present unless --no-embed-chapters/--no-embed-info-json are used (Alias: --add-metadata) --no-embed-metadata Do not add metadata to file (default) (Alias: --no-add-metadata) --embed-chapters Add chapter markers to the video file (Alias: --add-chapters) --no-embed-chapters Do not add chapter markers (default) (Alias: --no-add-chapters) --embed-info-json Embed the infojson as an attachment to mkv/mka video files --no-embed-info-json Do not embed the infojson as an attachment to the video file --parse-metadata [WHEN:]FROM:TO Parse additional metadata like title/artist from other fields; see "MODIFYING METADATA" for details. Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process) --replace-in-metadata [WHEN:]FIELDS REGEX REPLACE Replace text in a metadata field using the given regex. This option can be used multiple times. Supported values of "WHEN" are the same as that of --use-postprocessor (default: pre_process) --xattrs Write metadata to the video file's xattrs (using dublin core and xdg standards) --concat-playlist POLICY Concatenate videos in a playlist. One of "never", "always", or "multi_video" (default; only when the videos form a single show). All the video files must have same codecs and number of streams to be concatable. The "pl_video:" prefix can be used with "--paths" and "--output" to set the output filename for the concatenated files. See "OUTPUT TEMPLATE" for details --fixup POLICY Automatically correct known faults of the file. One of never (do nothing), warn (only emit a warning), detect_or_warn (the default; fix file if we can, warn otherwise), force (try fixing even if file already exists) --ffmpeg-location PATH Location of the ffmpeg binary; either the path to the binary or its containing directory --exec [WHEN:]CMD Execute a command, optionally prefixed with when to execute it, separated by a ":". Supported values of "WHEN" are the same as that of --use-postprocessor (default: after_move). Same syntax as the output template can be used to pass any field as arguments to the command. If no fields are passed, %(filepath,_filename|)q is appended to the end of the command. This option can be used multiple times --no-exec Remove any previously defined --exec --convert-subs FORMAT Convert the subtitles to another format (currently supported: ass, lrc, srt, vtt). Use "--convert-subs none" to disable conversion (default) (Alias: --convert- subtitles) --convert-thumbnails FORMAT Convert the thumbnails to another format (currently supported: jpg, png, webp). You can specify multiple rules using similar syntax as "--remux-video". Use "--convert- thumbnails none" to disable conversion (default) --split-chapters Split video into multiple files based on internal chapters. The "chapter:" prefix can be used with "--paths" and "--output" to set the output filename for the split files. See "OUTPUT TEMPLATE" for details --no-split-chapters Do not split video based on chapters (default) --remove-chapters REGEX Remove chapters whose title matches the given regular expression. The syntax is the same as --download-sections. This option can be used multiple times --no-remove-chapters Do not remove any chapters from the file (default) --force-keyframes-at-cuts Force keyframes at cuts when downloading/splitting/removing sections. This is slow due to needing a re-encode, but the resulting video may have fewer artifacts around the cuts --no-force-keyframes-at-cuts Do not force keyframes around the chapters when cutting/splitting (default) --use-postprocessor NAME[:ARGS] The (case sensitive) name of plugin postprocessors to be enabled, and (optionally) arguments to be passed to it, separated by a colon ":". ARGS are a semicolon ";" delimited list of NAME=VALUE. The "when" argument determines when the postprocessor is invoked. It can be one of "pre_process" (after video extraction), "after_filter" (after video passes filter), "video" (after --format; before --print/--output), "before_dl" (before each video download), "post_process" (after each video download; default), "after_move" (after moving video file to its final locations), "after_video" (after downloading and processing all formats of a video), or "playlist" (at end of playlist). This option can be used multiple times to add different postprocessors ## SponsorBlock Options: Make chapter entries for, or remove various segments (sponsor, introductions, etc.) from downloaded YouTube videos using the [SponsorBlock API](https://sponsor.ajay.app) --sponsorblock-mark CATS SponsorBlock categories to create chapters for, separated by commas. Available categories are sponsor, intro, outro, selfpromo, preview, filler, interaction, music_offtopic, poi_highlight, chapter, all and default (=all). You can prefix the category with a "-" to exclude it. See [1] for description of the categories. E.g. --sponsorblock-mark all,-preview [1] https://wiki.sponsor.ajay.app/w/Segment_Categories --sponsorblock-remove CATS SponsorBlock categories to be removed from the video file, separated by commas. If a category is present in both mark and remove, remove takes precedence. The syntax and available categories are the same as for --sponsorblock-mark except that "default" refers to "all,-filler" and poi_highlight, chapter are not available --sponsorblock-chapter-title TEMPLATE An output template for the title of the SponsorBlock chapters created by --sponsorblock-mark. The only available fields are start_time, end_time, category, categories, name, category_names. Defaults to "[SponsorBlock]: %(category_names)l" --no-sponsorblock Disable both --sponsorblock-mark and --sponsorblock-remove --sponsorblock-api URL SponsorBlock API location, defaults to https://sponsor.ajay.app ## Extractor Options: --extractor-retries RETRIES Number of retries for known extractor errors (default is 3), or "infinite" --allow-dynamic-mpd Process dynamic DASH manifests (default) (Alias: --no-ignore-dynamic-mpd) --ignore-dynamic-mpd Do not process dynamic DASH manifests (Alias: --no-allow-dynamic-mpd) --hls-split-discontinuity Split HLS playlists to different formats at discontinuities such as ad breaks --no-hls-split-discontinuity Do not split HLS playlists to different formats at discontinuities such as ad breaks (default) --extractor-args IE_KEY:ARGS Pass ARGS arguments to the IE_KEY extractor. See "EXTRACTOR ARGUMENTS" for details. You can use this option multiple times to give arguments for different extractors # CONFIGURATION You can configure yt-dlp by placing any supported command line option to a configuration file. The configuration is loaded from the following locations: 1. **Main Configuration**: * The file given to `--config-location` 1. **Portable Configuration**: (Recommended for portable installations) * If using a binary, `yt-dlp.conf` in the same directory as the binary * If running from source-code, `yt-dlp.conf` in the parent directory of `yt_dlp` 1. **Home Configuration**: * `yt-dlp.conf` in the home path given to `-P` * If `-P` is not given, the current directory is searched 1. **User Configuration**: * `${XDG_CONFIG_HOME}/yt-dlp.conf` * `${XDG_CONFIG_HOME}/yt-dlp/config` (recommended on Linux/macOS) * `${XDG_CONFIG_HOME}/yt-dlp/config.txt` * `${APPDATA}/yt-dlp.conf` * `${APPDATA}/yt-dlp/config` (recommended on Windows) * `${APPDATA}/yt-dlp/config.txt` * `~/yt-dlp.conf` * `~/yt-dlp.conf.txt` * `~/.yt-dlp/config` * `~/.yt-dlp/config.txt` See also: [Notes about environment variables](#notes-about-environment-variables) 1. **System Configuration**: * `/etc/yt-dlp.conf` * `/etc/yt-dlp/config` * `/etc/yt-dlp/config.txt` E.g. with the following configuration file, yt-dlp will always extract the audio, not copy the mtime, use a proxy and save all videos under `YouTube` directory in your home directory: ``` # Lines starting with # are comments # Always extract audio -x # Do not copy the mtime --no-mtime # Use this proxy --proxy 127.0.0.1:3128 # Save all videos under YouTube directory in your home directory -o ~/YouTube/%(title)s.%(ext)s ``` **Note**: Options in configuration file are just the same options aka switches used in regular command line calls; thus there **must be no whitespace** after `-` or `--`, e.g. `-o` or `--proxy` but not `- o` or `-- proxy`. They must also be quoted when necessary, as if it were a UNIX shell. You can use `--ignore-config` if you want to disable all configuration files for a particular yt-dlp run. If `--ignore-config` is found inside any configuration file, no further configuration will be loaded. For example, having the option in the portable configuration file prevents loading of home, user, and system configurations. Additionally, (for backward compatibility) if `--ignore-config` is found inside the system configuration file, the user configuration is not loaded. ### Configuration file encoding The configuration files are decoded according to the UTF BOM if present, and in the encoding from system locale otherwise. If you want your file to be decoded differently, add `# coding: ENCODING` to the beginning of the file (e.g. `# coding: shift-jis`). There must be no characters before that, even spaces or BOM. ### Authentication with netrc You may also want to configure automatic credentials storage for extractors that support authentication (by providing login and password with `--username` and `--password`) in order not to pass credentials as command line arguments on every yt-dlp execution and prevent tracking plain text passwords in the shell command history. You can achieve this using a [`.netrc` file](https://stackoverflow.com/tags/.netrc/info) on a per-extractor basis. For that, you will need to create a `.netrc` file in `--netrc-location` and restrict permissions to read/write by only you: ``` touch ${HOME}/.netrc chmod a-rwx,u+rw ${HOME}/.netrc ``` After that, you can add credentials for an extractor in the following format, where *extractor* is the name of the extractor in lowercase: ``` machine <extractor> login <username> password <password> ``` E.g. ``` machine youtube login myaccount@gmail.com password my_youtube_password machine twitch login my_twitch_account_name password my_twitch_password ``` To activate authentication with the `.netrc` file you should pass `--netrc` to yt-dlp or place it in the [configuration file](#configuration). The default location of the .netrc file is `~` (see below). As an alternative to using the `.netrc` file, which has the disadvantage of keeping your passwords in a plain text file, you can configure a custom shell command to provide the credentials for an extractor. This is done by providing the `--netrc-cmd` parameter, it shall output the credentials in the netrc format and return `0` on success, other values will be treated as an error. `{}` in the command will be replaced by the name of the extractor to make it possible to select the credentials for the right extractor. E.g. To use an encrypted `.netrc` file stored as `.authinfo.gpg` ``` yt-dlp --netrc-cmd 'gpg --decrypt ~/.authinfo.gpg' https://www.youtube.com/watch?v=BaW_jenozKc ``` ### Notes about environment variables * Environment variables are normally specified as `${VARIABLE}`/`$VARIABLE` on UNIX and `%VARIABLE%` on Windows; but is always shown as `${VARIABLE}` in this documentation * yt-dlp also allow using UNIX-style variables on Windows for path-like options; e.g. `--output`, `--config-location` * If unset, `${XDG_CONFIG_HOME}` defaults to `~/.config` and `${XDG_CACHE_HOME}` to `~/.cache` * On Windows, `~` points to `${HOME}` if present; or, `${USERPROFILE}` or `${HOMEDRIVE}${HOMEPATH}` otherwise * On Windows, `${USERPROFILE}` generally points to `C:\Users\<user name>` and `${APPDATA}` to `${USERPROFILE}\AppData\Roaming` # OUTPUT TEMPLATE The `-o` option is used to indicate a template for the output file names while `-P` option is used to specify the path each type of file should be saved to. <!-- MANPAGE: BEGIN EXCLUDED SECTION --> **tl;dr:** [navigate me to examples](#output-template-examples). <!-- MANPAGE: END EXCLUDED SECTION --> The simplest usage of `-o` is not to set any template arguments when downloading a single file, like in `yt-dlp -o funny_video.flv "https://some/video"` (hard-coding file extension like this is _not_ recommended and could break some post-processing). It may however also contain special sequences that will be replaced when downloading each video. The special sequences may be formatted according to [Python string formatting operations](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting), e.g. `%(NAME)s` or `%(NAME)05d`. To clarify, that is a percent symbol followed by a name in parentheses, followed by formatting operations. The field names themselves (the part inside the parenthesis) can also have some special formatting: 1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7)s`, `%(id.6:2:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields 1. **Arithmetic**: Simple arithmetic can be done on numeric fields using `+`, `-` and `*`. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d` 1. **Date/time Formatting**: Date/time fields can be formatted according to [strftime formatting](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes) by specifying it separated from the field name using a `>`. E.g. `%(duration>%H-%M-%S)s`, `%(upload_date>%Y-%m-%d)s`, `%(epoch-3600>%H-%M-%S)s` 1. **Alternatives**: Alternate fields can be specified separated with a `,`. E.g. `%(release_date>%Y,upload_date>%Y|Unknown)s` 1. **Replacement**: A replacement value can be specified using a `&` separator according to the [`str.format` mini-language](https://docs.python.org/3/library/string.html#format-specification-mini-language). If the field is *not* empty, this replacement value will be used instead of the actual field content. This is done after alternate fields are considered; thus the replacement is used if *any* of the alternative fields is *not* empty. E.g. `%(chapters&has chapters|no chapters)s`, `%(title&TITLE={:>20}|NO TITLE)s` 1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-placeholder`. E.g. `%(uploader|Unknown)s` 1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, yt-dlp additionally supports converting to `B` = **B**ytes, `j` = **j**son (flag `#` for pretty-printing, `+` for Unicode), `h` = HTML escaping, `l` = a comma separated **l**ist (flag `#` for `\n` newline-separated), `q` = a string **q**uoted for the terminal (flag `#` to split a list into different arguments), `D` = add **D**ecimal suffixes (e.g. 10M) (flag `#` to use 1024 as factor), and `S` = **S**anitize as filename (flag `#` for restricted) 1. **Unicode normalization**: The format type `U` can be used for NFC [Unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. E.g. `%(title)+.100U` is NFKC To summarize, the general syntax for a field is: ``` %(name[.keys][addition][>strf][,alternate][&replacement][|default])[flags][width][.precision][length]type ``` Additionally, you can set different output templates for the various metadata files separately from the general output template by specifying the type of file followed by the template separated by a colon `:`. The different file types supported are `subtitle`, `thumbnail`, `description`, `annotation` (deprecated), `infojson`, `link`, `pl_thumbnail`, `pl_description`, `pl_infojson`, `chapter`, `pl_video`. E.g. `-o "%(title)s.%(ext)s" -o "thumbnail:%(title)s\%(title)s.%(ext)s"` will put the thumbnails in a folder with the same name as the video. If any of the templates is empty, that type of file will not be written. E.g. `--write-thumbnail -o "thumbnail:"` will write thumbnails only for playlists and not for video. <a id="outtmpl-postprocess-note"></a> **Note**: Due to post-processing (i.e. merging etc.), the actual output filename might differ. Use `--print after_move:filepath` to get the name after all post-processing is complete. The available fields are: - `id` (string): Video identifier - `title` (string): Video title - `fulltitle` (string): Video title ignoring live timestamp and generic title - `ext` (string): Video filename extension - `alt_title` (string): A secondary title of the video - `description` (string): The description of the video - `display_id` (string): An alternative identifier for the video - `uploader` (string): Full name of the video uploader - `uploader_id` (string): Nickname or id of the video uploader - `uploader_url` (string): URL to the video uploader's profile - `license` (string): License name the video is licensed under - `creators` (list): The creators of the video - `creator` (string): The creators of the video; comma-separated - `timestamp` (numeric): UNIX timestamp of the moment the video became available - `upload_date` (string): Video upload date in UTC (YYYYMMDD) - `release_timestamp` (numeric): UNIX timestamp of the moment the video was released - `release_date` (string): The date (YYYYMMDD) when the video was released in UTC - `release_year` (numeric): Year (YYYY) when the video or album was released - `modified_timestamp` (numeric): UNIX timestamp of the moment the video was last modified - `modified_date` (string): The date (YYYYMMDD) when the video was last modified in UTC - `channel` (string): Full name of the channel the video is uploaded on - `channel_id` (string): Id of the channel - `channel_url` (string): URL of the channel - `channel_follower_count` (numeric): Number of followers of the channel - `channel_is_verified` (boolean): Whether the channel is verified on the platform - `location` (string): Physical location where the video was filmed - `duration` (numeric): Length of the video in seconds - `duration_string` (string): Length of the video (HH:mm:ss) - `view_count` (numeric): How many users have watched the video on the platform - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform. - `like_count` (numeric): Number of positive ratings of the video - `dislike_count` (numeric): Number of negative ratings of the video - `repost_count` (numeric): Number of reposts of the video - `average_rating` (numeric): Average rating give by users, the scale used depends on the webpage - `comment_count` (numeric): Number of comments on the video (For some extractors, comments are only downloaded at the end, and so this field cannot be used) - `age_limit` (numeric): Age restriction for the video (years) - `live_status` (string): One of "not_live", "is_live", "is_upcoming", "was_live", "post_live" (was live, but VOD is not yet processed) - `is_live` (boolean): Whether this video is a live stream or a fixed-length video - `was_live` (boolean): Whether this video was originally a live stream - `playable_in_embed` (string): Whether this video is allowed to play in embedded players on other sites - `availability` (string): Whether the video is "private", "premium_only", "subscriber_only", "needs_auth", "unlisted" or "public" - `media_type` (string): The type of media as classified by the site, e.g. "episode", "clip", "trailer" - `start_time` (numeric): Time in seconds where the reproduction should start, as specified in the URL - `end_time` (numeric): Time in seconds where the reproduction should end, as specified in the URL - `extractor` (string): Name of the extractor - `extractor_key` (string): Key name of the extractor - `epoch` (numeric): Unix epoch of when the information extraction was completed - `autonumber` (numeric): Number that will be increased with each download, starting at `--autonumber-start`, padded with leading zeros to 5 digits - `video_autonumber` (numeric): Number that will be increased with each video - `n_entries` (numeric): Total number of extracted items in the playlist - `playlist_id` (string): Identifier of the playlist that contains the video - `playlist_title` (string): Name of the playlist that contains the video - `playlist` (string): `playlist_title` if available or else `playlist_id` - `playlist_count` (numeric): Total number of items in the playlist. May not be known if entire playlist is not extracted - `playlist_index` (numeric): Index of the video in the playlist padded with leading zeros according the final index - `playlist_autonumber` (numeric): Position of the video in the playlist download queue padded with leading zeros according to the total length of the playlist - `playlist_uploader` (string): Full name of the playlist uploader - `playlist_uploader_id` (string): Nickname or id of the playlist uploader - `playlist_channel` (string): Display name of the channel that uploaded the playlist - `playlist_channel_id` (string): Identifier of the channel that uploaded the playlist - `webpage_url` (string): A URL to the video webpage which, if given to yt-dlp, should yield the same result again - `webpage_url_basename` (string): The basename of the webpage URL - `webpage_url_domain` (string): The domain of the webpage URL - `original_url` (string): The URL given by the user (or same as `webpage_url` for playlist entries) - `categories` (list): List of categories the video belongs to - `tags` (list): List of tags assigned to the video - `cast` (list): List of cast members All the fields in [Filtering Formats](#filtering-formats) can also be used Available for the video that belongs to some logical chapter or section: - `chapter` (string): Name or title of the chapter the video belongs to - `chapter_number` (numeric): Number of the chapter the video belongs to - `chapter_id` (string): Id of the chapter the video belongs to Available for the video that is an episode of some series or program: - `series` (string): Title of the series or program the video episode belongs to - `series_id` (string): Id of the series or program the video episode belongs to - `season` (string): Title of the season the video episode belongs to - `season_number` (numeric): Number of the season the video episode belongs to - `season_id` (string): Id of the season the video episode belongs to - `episode` (string): Title of the video episode - `episode_number` (numeric): Number of the video episode within a season - `episode_id` (string): Id of the video episode Available for the media that is a track or a part of a music album: - `track` (string): Title of the track - `track_number` (numeric): Number of the track within an album or a disc - `track_id` (string): Id of the track - `artists` (list): Artist(s) of the track - `artist` (string): Artist(s) of the track; comma-separated - `genres` (list): Genre(s) of the track - `genre` (string): Genre(s) of the track; comma-separated - `composers` (list): Composer(s) of the piece - `composer` (string): Composer(s) of the piece; comma-separated - `album` (string): Title of the album the track belongs to - `album_type` (string): Type of the album - `album_artists` (list): All artists appeared on the album - `album_artist` (string): All artists appeared on the album; comma-separated - `disc_number` (numeric): Number of the disc or other physical medium the track belongs to Available only when using `--download-sections` and for `chapter:` prefix when using `--split-chapters` for videos with internal chapters: - `section_title` (string): Title of the chapter - `section_number` (numeric): Number of the chapter within the file - `section_start` (numeric): Start time of the chapter in seconds - `section_end` (numeric): End time of the chapter in seconds Available only when used in `--print`: - `urls` (string): The URLs of all requested formats, one in each line - `filename` (string): Name of the video file. Note that the [actual filename may differ](#outtmpl-postprocess-note) - `formats_table` (table): The video format table as printed by `--list-formats` - `thumbnails_table` (table): The thumbnail format table as printed by `--list-thumbnails` - `subtitles_table` (table): The subtitle format table as printed by `--list-subs` - `automatic_captions_table` (table): The automatic subtitle format table as printed by `--list-subs` Available only after the video is downloaded (`post_process`/`after_move`): - `filepath`: Actual path of downloaded video file Available only in `--sponsorblock-chapter-title`: - `start_time` (numeric): Start time of the chapter in seconds - `end_time` (numeric): End time of the chapter in seconds - `categories` (list): The [SponsorBlock categories](https://wiki.sponsor.ajay.app/w/Types#Category) the chapter belongs to - `category` (string): The smallest SponsorBlock category the chapter belongs to - `category_names` (list): Friendly names of the categories - `name` (string): Friendly name of the smallest category - `type` (string): The [SponsorBlock action type](https://wiki.sponsor.ajay.app/w/Types#Action_Type) of the chapter Each aforementioned sequence when referenced in an output template will be replaced by the actual value corresponding to the sequence name. E.g. for `-o %(title)s-%(id)s.%(ext)s` and an mp4 video with title `yt-dlp test video` and id `BaW_jenozKc`, this will result in a `yt-dlp test video-BaW_jenozKc.mp4` file created in the current directory. **Note**: Some of the sequences are not guaranteed to be present, since they depend on the metadata obtained by a particular extractor. Such sequences will be replaced with placeholder value provided with `--output-na-placeholder` (`NA` by default). **Tip**: Look at the `-j` output to identify which fields are available for the particular URL For numeric sequences you can use [numeric related formatting](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting); e.g. `%(view_count)05d` will result in a string with view count padded with zeros up to 5 characters, like in `00042`. Output templates can also contain arbitrary hierarchical path, e.g. `-o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s"` which will result in downloading each video in a directory corresponding to this path template. Any missing directory will be automatically created for you. To use percent literals in an output template use `%%`. To output to stdout use `-o -`. The current default template is `%(title)s [%(id)s].%(ext)s`. In some cases, you don't want special characters such as 中, spaces, or &, such as when transferring the downloaded filename to a Windows system or the filename through an 8bit-unsafe channel. In these cases, add the `--restrict-filenames` flag to get a shorter title. #### Output template examples ```bash $ yt-dlp --print filename -o "test video.%(ext)s" BaW_jenozKc test video.webm # Literal name with correct extension $ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc youtube-dl test video ''_ä↭𝕐.webm # All kinds of weird characters $ yt-dlp --print filename -o "%(title)s.%(ext)s" BaW_jenozKc --restrict-filenames youtube-dl_test_video_.webm # Restricted file name # Download YouTube playlist videos in separate directory indexed by video order in a playlist $ yt-dlp -o "%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" # Download YouTube playlist videos in separate directories according to their uploaded year $ yt-dlp -o "%(upload_date>%Y)s/%(title)s.%(ext)s" "https://www.youtube.com/playlist?list=PLwiyx1dc3P2JR9N8gQaQN_BCvlSlap7re" # Prefix playlist index with " - " separator, but only if it is available $ yt-dlp -o "%(playlist_index&{} - |)s%(title)s.%(ext)s" BaW_jenozKc "https://www.youtube.com/user/TheLinuxFoundation/playlists" # Download all playlists of YouTube channel/user keeping each playlist in separate directory: $ yt-dlp -o "%(uploader)s/%(playlist)s/%(playlist_index)s - %(title)s.%(ext)s" "https://www.youtube.com/user/TheLinuxFoundation/playlists" # Download Udemy course keeping each chapter in separate directory under MyVideos directory in your home $ yt-dlp -u user -p password -P "~/MyVideos" -o "%(playlist)s/%(chapter_number)s - %(chapter)s/%(title)s.%(ext)s" "https://www.udemy.com/java-tutorial" # Download entire series season keeping each series and each season in separate directory under C:/MyVideos $ yt-dlp -P "C:/MyVideos" -o "%(series)s/%(season_number)s - %(season)s/%(episode_number)s - %(episode)s.%(ext)s" "https://videomore.ru/kino_v_detalayah/5_sezon/367617" # Download video as "C:\MyVideos\uploader\title.ext", subtitles as "C:\MyVideos\subs\uploader\title.ext" # and put all temporary files in "C:\MyVideos\tmp" $ yt-dlp -P "C:/MyVideos" -P "temp:tmp" -P "subtitle:subs" -o "%(uploader)s/%(title)s.%(ext)s" BaW_jenoz --write-subs # Download video as "C:\MyVideos\uploader\title.ext" and subtitles as "C:\MyVideos\uploader\subs\title.ext" $ yt-dlp -P "C:/MyVideos" -o "%(uploader)s/%(title)s.%(ext)s" -o "subtitle:%(uploader)s/subs/%(title)s.%(ext)s" BaW_jenozKc --write-subs # Stream the video being downloaded to stdout $ yt-dlp -o - BaW_jenozKc ``` # FORMAT SELECTION By default, yt-dlp tries to download the best available quality if you **don't** pass any options. This is generally equivalent to using `-f bestvideo*+bestaudio/best`. However, if multiple audiostreams is enabled (`--audio-multistreams`), the default format changes to `-f bestvideo+bestaudio/best`. Similarly, if ffmpeg is unavailable, or if you use yt-dlp to stream to `stdout` (`-o -`), the default becomes `-f best/bestvideo+bestaudio`. **Deprecation warning**: Latest versions of yt-dlp can stream multiple formats to the stdout simultaneously using ffmpeg. So, in future versions, the default for this will be set to `-f bv*+ba/b` similar to normal downloads. If you want to preserve the `-f b/bv+ba` setting, it is recommended to explicitly specify it in the configuration options. The general syntax for format selection is `-f FORMAT` (or `--format FORMAT`) where `FORMAT` is a *selector expression*, i.e. an expression that describes format or formats you would like to download. <!-- MANPAGE: BEGIN EXCLUDED SECTION --> **tl;dr:** [navigate me to examples](#format-selection-examples). <!-- MANPAGE: END EXCLUDED SECTION --> The simplest case is requesting a specific format; e.g. with `-f 22` you can download the format with format code equal to 22. You can get the list of available format codes for particular video using `--list-formats` or `-F`. Note that these format codes are extractor specific. You can also use a file extension (currently `3gp`, `aac`, `flv`, `m4a`, `mp3`, `mp4`, `ogg`, `wav`, `webm` are supported) to download the best quality format of a particular file extension served as a single file, e.g. `-f webm` will download the best quality format with the `webm` extension served as a single file. You can use `-f -` to interactively provide the format selector *for each video* You can also use special names to select particular edge case formats: - `all`: Select **all formats** separately - `mergeall`: Select and **merge all formats** (Must be used with `--audio-multistreams`, `--video-multistreams` or both) - `b*`, `best*`: Select the best quality format that **contains either** a video or an audio or both (i.e.; `vcodec!=none or acodec!=none`) - `b`, `best`: Select the best quality format that **contains both** video and audio. Equivalent to `best*[vcodec!=none][acodec!=none]` - `bv`, `bestvideo`: Select the best quality **video-only** format. Equivalent to `best*[acodec=none]` - `bv*`, `bestvideo*`: Select the best quality format that **contains video**. It may also contain audio. Equivalent to `best*[vcodec!=none]` - `ba`, `bestaudio`: Select the best quality **audio-only** format. Equivalent to `best*[vcodec=none]` - `ba*`, `bestaudio*`: Select the best quality format that **contains audio**. It may also contain video. Equivalent to `best*[acodec!=none]` ([Do not use!](https://github.com/yt-dlp/yt-dlp/issues/979#issuecomment-919629354)) - `w*`, `worst*`: Select the worst quality format that contains either a video or an audio - `w`, `worst`: Select the worst quality format that contains both video and audio. Equivalent to `worst*[vcodec!=none][acodec!=none]` - `wv`, `worstvideo`: Select the worst quality video-only format. Equivalent to `worst*[acodec=none]` - `wv*`, `worstvideo*`: Select the worst quality format that contains video. It may also contain audio. Equivalent to `worst*[vcodec!=none]` - `wa`, `worstaudio`: Select the worst quality audio-only format. Equivalent to `worst*[vcodec=none]` - `wa*`, `worstaudio*`: Select the worst quality format that contains audio. It may also contain video. Equivalent to `worst*[acodec!=none]` For example, to download the worst quality video-only format you can use `-f worstvideo`. It is, however, recommended not to use `worst` and related options. When your format selector is `worst`, the format which is worst in all respects is selected. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-S +size` or more rigorously, `-S +size,+br,+res,+fps` instead of `-f worst`. See [Sorting Formats](#sorting-formats) for more details. You can select the n'th best format of a type by using `best<type>.<n>`. For example, `best.2` will select the 2nd best combined format. Similarly, `bv*.3` will select the 3rd best format that contains a video stream. If you want to download multiple videos, and they don't have the same formats available, you can specify the order of preference using slashes. Note that formats on the left hand side are preferred; e.g. `-f 22/17/18` will download format 22 if it's available, otherwise it will download format 17 if it's available, otherwise it will download format 18 if it's available, otherwise it will complain that no suitable formats are available for download. If you want to download several formats of the same video use a comma as a separator, e.g. `-f 22,17,18` will download all these three formats, of course if they are available. Or a more sophisticated example combined with the precedence feature: `-f 136/137/mp4/bestvideo,140/m4a/bestaudio`. You can merge the video and audio of multiple formats into a single file using `-f <format1>+<format2>+...` (requires ffmpeg installed); e.g. `-f bestvideo+bestaudio` will download the best video-only format, the best audio-only format and mux them together with ffmpeg. **Deprecation warning**: Since the *below* described behavior is complex and counter-intuitive, this will be removed and multistreams will be enabled by default in the future. A new operator will be instead added to limit formats to single audio/video Unless `--video-multistreams` is used, all formats with a video stream except the first one are ignored. Similarly, unless `--audio-multistreams` is used, all formats with an audio stream except the first one are ignored. E.g. `-f bestvideo+best+bestaudio --video-multistreams --audio-multistreams` will download and merge all 3 given formats. The resulting file will have 2 video streams and 2 audio streams. But `-f bestvideo+best+bestaudio --no-video-multistreams` will download and merge only `bestvideo` and `bestaudio`. `best` is ignored since another format containing a video stream (`bestvideo`) has already been selected. The order of the formats is therefore important. `-f best+bestaudio --no-audio-multistreams` will download only `best` while `-f bestaudio+best --no-audio-multistreams` will ignore `best` and download only `bestaudio`. ## Filtering Formats You can also filter the video formats by putting a condition in brackets, as in `-f "best[height=720]"` (or `-f "[filesize>10M]"` since filters without a selector are interpreted as `best`). The following numeric meta fields can be used with comparisons `<`, `<=`, `>`, `>=`, `=` (equals), `!=` (not equals): - `filesize`: The number of bytes, if known in advance - `filesize_approx`: An estimate for the number of bytes - `width`: Width of the video, if known - `height`: Height of the video, if known - `aspect_ratio`: Aspect ratio of the video, if known - `tbr`: Average bitrate of audio and video in [kbps](## "1000 bits/sec") - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec") - `vbr`: Average video bitrate in [kbps](## "1000 bits/sec") - `asr`: Audio sampling rate in Hertz - `fps`: Frame rate - `audio_channels`: The number of audio channels - `stretched_ratio`: `width:height` of the video's pixels, if not square Also filtering work for comparisons `=` (equals), `^=` (starts with), `$=` (ends with), `*=` (contains), `~=` (matches regex) and following string meta fields: - `url`: Video URL - `ext`: File extension - `acodec`: Name of the audio codec in use - `vcodec`: Name of the video codec in use - `container`: Name of the container format - `protocol`: The protocol that will be used for the actual download, lower-case (`http`, `https`, `rtsp`, `rtmp`, `rtmpe`, `mms`, `f4m`, `ism`, `http_dash_segments`, `m3u8`, or `m3u8_native`) - `language`: Language code - `dynamic_range`: The dynamic range of the video - `format_id`: A short description of the format - `format`: A human-readable description of the format - `format_note`: Additional info about the format - `resolution`: Textual description of width and height Any string comparison may be prefixed with negation `!` in order to produce an opposite comparison, e.g. `!*=` (does not contain). The comparand of a string comparison needs to be quoted with either double or single quotes if it contains spaces or special characters other than `._-`. **Note**: None of the aforementioned meta fields are guaranteed to be present since this solely depends on the metadata obtained by the particular extractor, i.e. the metadata offered by the website. Any other field made available by the extractor can also be used for filtering. Formats for which the value is not known are excluded unless you put a question mark (`?`) after the operator. You can combine format filters, so `-f "bv[height<=?720][tbr>500]"` selects up to 720p videos (or videos where the height is not known) with a bitrate of at least 500 kbps. You can also use the filters with `all` to download all formats that satisfy the filter, e.g. `-f "all[vcodec=none]"` selects all audio-only formats. Format selectors can also be grouped using parentheses; e.g. `-f "(mp4,webm)[height<480]"` will download the best pre-merged mp4 and webm formats with a height lower than 480. ## Sorting Formats You can change the criteria for being considered the `best` by using `-S` (`--format-sort`). The general format for this is `--format-sort field1,field2...`. The available fields are: - `hasvid`: Gives priority to formats that have a video stream - `hasaud`: Gives priority to formats that have an audio stream - `ie_pref`: The format preference - `lang`: The language preference - `quality`: The quality of the format - `source`: The preference of the source - `proto`: Protocol used for download (`https`/`ftps` > `http`/`ftp` > `m3u8_native`/`m3u8` > `http_dash_segments`> `websocket_frag` > `mms`/`rtsp` > `f4f`/`f4m`) - `vcodec`: Video Codec (`av01` > `vp9.2` > `vp9` > `h265` > `h264` > `vp8` > `h263` > `theora` > other) - `acodec`: Audio Codec (`flac`/`alac` > `wav`/`aiff` > `opus` > `vorbis` > `aac` > `mp4a` > `mp3` > `ac4` > `eac3` > `ac3` > `dts` > other) - `codec`: Equivalent to `vcodec,acodec` - `vext`: Video Extension (`mp4` > `mov` > `webm` > `flv` > other). If `--prefer-free-formats` is used, `webm` is preferred. - `aext`: Audio Extension (`m4a` > `aac` > `mp3` > `ogg` > `opus` > `webm` > other). If `--prefer-free-formats` is used, the order changes to `ogg` > `opus` > `webm` > `mp3` > `m4a` > `aac` - `ext`: Equivalent to `vext,aext` - `filesize`: Exact filesize, if known in advance - `fs_approx`: Approximate filesize - `size`: Exact filesize if available, otherwise approximate filesize - `height`: Height of video - `width`: Width of video - `res`: Video resolution, calculated as the smallest dimension. - `fps`: Framerate of video - `hdr`: The dynamic range of the video (`DV` > `HDR12` > `HDR10+` > `HDR10` > `HLG` > `SDR`) - `channels`: The number of audio channels - `tbr`: Total average bitrate in [kbps](## "1000 bits/sec") - `vbr`: Average video bitrate in [kbps](## "1000 bits/sec") - `abr`: Average audio bitrate in [kbps](## "1000 bits/sec") - `br`: Average bitrate in [kbps](## "1000 bits/sec"), `tbr`/`vbr`/`abr` - `asr`: Audio sample rate in Hz **Deprecation warning**: Many of these fields have (currently undocumented) aliases, that may be removed in a future version. It is recommended to use only the documented field names. All fields, unless specified otherwise, are sorted in descending order. To reverse this, prefix the field with a `+`. E.g. `+res` prefers format with the smallest resolution. Additionally, you can suffix a preferred value for the fields, separated by a `:`. E.g. `res:720` prefers larger videos, but no larger than 720p and the smallest video if there are no videos less than 720p. For `codec` and `ext`, you can provide two preferred values, the first for video and the second for audio. E.g. `+codec:avc:m4a` (equivalent to `+vcodec:avc,+acodec:m4a`) sets the video codec preference to `h264` > `h265` > `vp9` > `vp9.2` > `av01` > `vp8` > `h263` > `theora` and audio codec preference to `mp4a` > `aac` > `vorbis` > `opus` > `mp3` > `ac3` > `dts`. You can also make the sorting prefer the nearest values to the provided by using `~` as the delimiter. E.g. `filesize~1G` prefers the format with filesize closest to 1 GiB. The fields `hasvid` and `ie_pref` are always given highest priority in sorting, irrespective of the user-defined order. This behavior can be changed by using `--format-sort-force`. Apart from these, the default order used is: `lang,quality,res,fps,hdr:12,vcodec:vp9.2,channels,acodec,size,br,asr,proto,ext,hasaud,source,id`. The extractors may override this default order, but they cannot override the user-provided order. Note that the default has `vcodec:vp9.2`; i.e. `av1` is not preferred. Similarly, the default for hdr is `hdr:12`; i.e. Dolby Vision is not preferred. These choices are made since DV and AV1 formats are not yet fully compatible with most devices. This may be changed in the future as more devices become capable of smoothly playing back these formats. If your format selector is `worst`, the last item is selected after sorting. This means it will select the format that is worst in all respects. Most of the time, what you actually want is the video with the smallest filesize instead. So it is generally better to use `-f best -S +size,+br,+res,+fps`. **Tip**: You can use the `-v -F` to see how the formats have been sorted (worst to best). ## Format Selection examples ```bash # Download and merge the best video-only format and the best audio-only format, # or download the best combined format if video-only format is not available $ yt-dlp -f "bv+ba/b" # Download best format that contains video, # and if it doesn't already have an audio stream, merge it with best audio-only format $ yt-dlp -f "bv*+ba/b" # Same as above $ yt-dlp # Download the best video-only format and the best audio-only format without merging them # For this case, an output template should be used since # by default, bestvideo and bestaudio will have the same file name. $ yt-dlp -f "bv,ba" -o "%(title)s.f%(format_id)s.%(ext)s" # Download and merge the best format that has a video stream, # and all audio-only formats into one file $ yt-dlp -f "bv*+mergeall[vcodec=none]" --audio-multistreams # Download and merge the best format that has a video stream, # and the best 2 audio-only formats into one file $ yt-dlp -f "bv*+ba+ba.2" --audio-multistreams # The following examples show the old method (without -S) of format selection # and how to use -S to achieve a similar but (generally) better result # Download the worst video available (old method) $ yt-dlp -f "wv*+wa/w" # Download the best video available but with the smallest resolution $ yt-dlp -S "+res" # Download the smallest video available $ yt-dlp -S "+size,+br" # Download the best mp4 video available, or the best video if no mp4 available $ yt-dlp -f "bv*[ext=mp4]+ba[ext=m4a]/b[ext=mp4] / bv*+ba/b" # Download the best video with the best extension # (For video, mp4 > mov > webm > flv. For audio, m4a > aac > mp3 ...) $ yt-dlp -S "ext" # Download the best video available but no better than 480p, # or the worst video if there is no video under 480p $ yt-dlp -f "bv*[height<=480]+ba/b[height<=480] / wv*+ba/w" # Download the best video available with the largest height but no better than 480p, # or the best video with the smallest resolution if there is no video under 480p $ yt-dlp -S "height:480" # Download the best video available with the largest resolution but no better than 480p, # or the best video with the smallest resolution if there is no video under 480p # Resolution is determined by using the smallest dimension. # So this works correctly for vertical videos as well $ yt-dlp -S "res:480" # Download the best video (that also has audio) but no bigger than 50 MB, # or the worst video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b[filesize<50M] / w" # Download largest video (that also has audio) but no bigger than 50 MB, # or the smallest video (that also has audio) if there is no video under 50 MB $ yt-dlp -f "b" -S "filesize:50M" # Download best video (that also has audio) that is closest in size to 50 MB $ yt-dlp -f "b" -S "filesize~50M" # Download best video available via direct link over HTTP/HTTPS protocol, # or the best video available via any protocol if there is no such video $ yt-dlp -f "(bv*+ba/b)[protocol^=http][protocol!*=dash] / (bv*+ba/b)" # Download best video available via the best protocol # (https/ftps > http/ftp > m3u8_native > m3u8 > http_dash_segments ...) $ yt-dlp -S "proto" # Download the best video with either h264 or h265 codec, # or the best video if there is no such video $ yt-dlp -f "(bv*[vcodec~='^((he|a)vc|h26[45])']+ba) / (bv*+ba/b)" # Download the best video with best codec no better than h264, # or the best video with worst codec if there is no such video $ yt-dlp -S "codec:h264" # Download the best video with worst codec no worse than h264, # or the best video with best codec if there is no such video $ yt-dlp -S "+codec:h264" # More complex examples # Download the best video no better than 720p preferring framerate greater than 30, # or the worst video (still preferring framerate greater than 30) if there is no such video $ yt-dlp -f "((bv*[fps>30]/bv*)[height<=720]/(wv*[fps>30]/wv*)) + ba / (b[fps>30]/b)[height<=720]/(w[fps>30]/w)" # Download the video with the largest resolution no better than 720p, # or the video with the smallest resolution available if there is no such video, # preferring larger framerate for formats with the same resolution $ yt-dlp -S "res:720,fps" # Download the video with smallest resolution no worse than 480p, # or the video with the largest resolution available if there is no such video, # preferring better codec and then larger total bitrate for the same resolution $ yt-dlp -S "+res:480,codec,br" ``` # MODIFYING METADATA The metadata obtained by the extractors can be modified by using `--parse-metadata` and `--replace-in-metadata` `--replace-in-metadata FIELDS REGEX REPLACE` is used to replace text in any metadata field using [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax). [Backreferences](https://docs.python.org/3/library/re.html?highlight=backreferences#re.sub) can be used in the replace string for advanced use. The general syntax of `--parse-metadata FROM:TO` is to give the name of a field or an [output template](#output-template) to extract data from, and the format to interpret it as, separated by a colon `:`. Either a [Python regular expression](https://docs.python.org/3/library/re.html#regular-expression-syntax) with named capture groups, a single field name, or a similar syntax to the [output template](#output-template) (only `%(field)s` formatting is supported) can be used for `TO`. The option can be used multiple times to parse and modify various fields. Note that these options preserve their relative order, allowing replacements to be made in parsed fields and viceversa. Also, any field thus created can be used in the [output template](#output-template) and will also affect the media file's metadata added when using `--embed-metadata`. This option also has a few special uses: * You can download an additional URL based on the metadata of the currently downloaded video. To do this, set the field `additional_urls` to the URL that you want to download. E.g. `--parse-metadata "description:(?P<additional_urls>https?://www\.vimeo\.com/\d+)"` will download the first vimeo video found in the description * You can use this to change the metadata that is embedded in the media file. To do this, set the value of the corresponding field with a `meta_` prefix. For example, any value you set to `meta_description` field will be added to the `description` field in the file - you can use this to set a different "description" and "synopsis". To modify the metadata of individual streams, use the `meta<n>_` prefix (e.g. `meta1_language`). Any value set to the `meta_` field will overwrite all default values. **Note**: Metadata modification happens before format selection, post-extraction and other post-processing operations. Some fields may be added or changed during these steps, overriding your changes. For reference, these are the fields yt-dlp adds by default to the file metadata: Metadata fields | From :--------------------------|:------------------------------------------------ `title` | `track` or `title` `date` | `upload_date` `description`, `synopsis` | `description` `purl`, `comment` | `webpage_url` `track` | `track_number` `artist` | `artist`, `artists`, `creator`, `creators`, `uploader` or `uploader_id` `composer` | `composer` or `composers` `genre` | `genre` or `genres` `album` | `album` `album_artist` | `album_artist` or `album_artists` `disc` | `disc_number` `show` | `series` `season_number` | `season_number` `episode_id` | `episode` or `episode_id` `episode_sort` | `episode_number` `language` of each stream | the format's `language` **Note**: The file format may not support some of these fields ## Modifying metadata examples ```bash # Interpret the title as "Artist - Title" $ yt-dlp --parse-metadata "title:%(artist)s - %(title)s" # Regex example $ yt-dlp --parse-metadata "description:Artist - (?P<artist>.+)" # Set title as "Series name S01E05" $ yt-dlp --parse-metadata "%(series)s S%(season_number)02dE%(episode_number)02d:%(title)s" # Prioritize uploader as the "artist" field in video metadata $ yt-dlp --parse-metadata "%(uploader|)s:%(meta_artist)s" --embed-metadata # Set "comment" field in video metadata using description instead of webpage_url, # handling multiple lines correctly $ yt-dlp --parse-metadata "description:(?s)(?P<meta_comment>.+)" --embed-metadata # Do not set any "synopsis" in the video metadata $ yt-dlp --parse-metadata ":(?P<meta_synopsis>)" # Remove "formats" field from the infojson by setting it to an empty string $ yt-dlp --parse-metadata "video::(?P<formats>)" --write-info-json # Replace all spaces and "_" in title and uploader with a `-` $ yt-dlp --replace-in-metadata "title,uploader" "[ _]" "-" ``` # EXTRACTOR ARGUMENTS Some extractors accept additional arguments which can be passed using `--extractor-args KEY:ARGS`. `ARGS` is a `;` (semicolon) separated string of `ARG=VAL1,VAL2`. E.g. `--extractor-args "youtube:player-client=mediaconnect,web;formats=incomplete" --extractor-args "funimation:version=uncut"` Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` becomes `youtube:player_client"` The following extractors use this feature: #### youtube * `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes * `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively * `player_client`: Clients to extract video data from. The main clients are `web`, `ios` and `android`, with variants `_music` and `_creator` (e.g. `ios_creator`); and `mediaconnect`, `mweb`, `android_producer`, `android_testsuite`, `android_vr`, `web_safari`, `web_embedded`, `tv` and `tv_embedded` with no variants. By default, `ios,web_creator` is used, and `tv_embedded`, `web_creator` and `mediaconnect` are added as required for age-gated videos. Similarly, the music variants are added for `music.youtube.com` urls. Most `android` clients will be given lowest priority since their formats are broken. You can use `all` to use all the clients, and `default` for the default clients. You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=all,-web` * `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause some issues. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) for more details * `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp. * `comment_sort`: `top` or `new` (default) - choose comment sorting mode (on YouTube's side) * `max_comments`: Limit the amount of comments to gather. Comma-separated list of integers representing `max-comments,max-parents,max-replies,max-replies-per-thread`. Default is `all,all,all,all` * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8) * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others * `innertube_key`: Innertube API key to use for all API requests. By default, no API key is used * `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning * `data_sync_id`: Overrides the account Data Sync ID used in Innertube API requests. This may be needed if you are using an account with `youtube:player_skip=webpage,configs` or `youtubetab:skip=webpage` * `visitor_data`: Overrides the Visitor Data used in Innertube API requests. This should be used with `player_skip=webpage,configs` and without cookies. Note: this may have adverse effects if used improperly. If a session from a browser is wanted, you should pass cookies instead (which contain the Visitor ID) * `po_token`: Proof of Origin (PO) Token(s) to use for requesting video playback. Comma seperated list of PO Tokens in the format `CLIENT+PO_TOKEN`, e.g. `youtube:po_token=web+XXX,android+YYY` #### youtubetab (YouTube playlists, channels, feeds, etc.) * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details) * `approximate_date`: Extract approximate `upload_date` and `timestamp` in flat-playlist. This may cause date-based filters to be slightly off #### generic * `fragment_query`: Passthrough any query in mpd/m3u8 manifest URLs to their fragments if no value is provided, or else apply the query string given as `fragment_query=VALUE`. Note that if the stream has an HLS AES-128 key, then the query parameters will be passed to the key URI as well, unless the `key_query` extractor-arg is passed, or unless an external key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `variant_query`: Passthrough the master m3u8 URL query to its variant playlist URLs if no value is provided, or else apply the query string given as `variant_query=VALUE` * `key_query`: Passthrough the master m3u8 URL query to its HLS AES-128 decryption key URI if no value is provided, or else apply the query string given as `key_query=VALUE`. Note that this will have no effect if the key URI is provided via the `hls_key` extractor-arg. Does not apply to ffmpeg * `hls_key`: An HLS AES-128 key URI *or* key (as hex), and optionally the IV (as hex), in the form of `(URI|KEY)[,IV]`; e.g. `generic:hls_key=ABCDEF1234567980,0xFEDCBA0987654321`. Passing any of these values will force usage of the native HLS downloader and override the corresponding values found in the m3u8 playlist * `is_live`: Bypass live HLS detection and manually set `live_status` - a value of `false` will set `not_live`, any other value (or no value) will set `is_live` #### funimation * `language`: Audio languages to extract, e.g. `funimation:language=english,japanese` * `version`: The video version to extract - `uncut` or `simulcast` #### crunchyrollbeta (Crunchyroll) * `hardsub`: One or more hardsub versions to extract (in order of preference), or `all` (default: `None` = no hardsubs will be extracted), e.g. `crunchyrollbeta:hardsub=en-US,de-DE` #### vikichannel * `video_types`: Types of videos to download - one or more of `episodes`, `movies`, `clips`, `trailers` #### niconico * `segment_duration`: Segment duration in milliseconds for HLS-DMC formats. Use it at your own risk since this feature **may result in your account termination.** #### youtubewebarchive * `check_all`: Try to check more at the cost of more requests. One or more of `thumbnails`, `captures` #### gamejolt * `comment_sort`: `hot` (default), `you` (cookies needed), `top`, `new` - choose comment sorting mode (on GameJolt's side) #### hotstar * `res`: resolution to ignore - one or more of `sd`, `hd`, `fhd` * `vcodec`: vcodec to ignore - one or more of `h264`, `h265`, `dvh265` * `dr`: dynamic range to ignore - one or more of `sdr`, `hdr10`, `dv` #### niconicochannelplus * `max_comments`: Maximum number of comments to extract - default is `120` #### tiktok * `api_hostname`: Hostname to use for mobile API calls, e.g. `api22-normal-c-alisg.tiktokv.com` * `app_name`: Default app name to use with mobile API calls, e.g. `trill` * `app_version`: Default app version to use with mobile API calls - should be set along with `manifest_app_version`, e.g. `34.1.2` * `manifest_app_version`: Default numeric app version to use with mobile API calls, e.g. `2023401020` * `aid`: Default app ID to use with mobile API calls, e.g. `1180` * `app_info`: Enable mobile API extraction with one or more app info strings in the format of `<iid>/[app_name]/[app_version]/[manifest_app_version]/[aid]`, where `iid` is the unique app install ID. `iid` is the only required value; all other values and their `/` separators can be omitted, e.g. `tiktok:app_info=1234567890123456789` or `tiktok:app_info=123,456/trill///1180,789//34.0.1/340001` * `device_id`: Enable mobile API extraction with a genuine device ID to be used with mobile API calls. Default is a random 19-digit string #### rokfinchannel * `tab`: Which tab to download - one of `new`, `top`, `videos`, `podcasts`, `streams`, `stacks` #### twitter * `api`: Select one of `graphql` (default), `legacy` or `syndication` as the API for tweet extraction. Has no effect if logged in #### stacommu, wrestleuniverse * `device_id`: UUID value assigned by the website and used to enforce device limits for paid livestream content. Can be found in browser local storage #### twitch * `client_id`: Client ID value to be sent with GraphQL requests, e.g. `twitch:client_id=kimne78kx3ncx6brgo4mv6wki5h1ko` #### nhkradirulive (NHK らじる★らじる LIVE) * `area`: Which regional variation to extract. Valid areas are: `sapporo`, `sendai`, `tokyo`, `nagoya`, `osaka`, `hiroshima`, `matsuyama`, `fukuoka`. Defaults to `tokyo` #### nflplusreplay * `type`: Type(s) of game replays to extract. Valid types are: `full_game`, `full_game_spanish`, `condensed_game` and `all_22`. You can use `all` to extract all available replay types, which is the default #### jiocinema * `refresh_token`: The `refreshToken` UUID from browser local storage can be passed to extend the life of your login session when logging in with `token` as username and the `accessToken` from browser local storage as password #### jiosaavn * `bitrate`: Audio bitrates to request. One or more of `16`, `32`, `64`, `128`, `320`. Default is `128,320` #### afreecatvlive * `cdn`: One or more CDN IDs to use with the API call for stream URLs, e.g. `gcp_cdn`, `gs_cdn_pc_app`, `gs_cdn_mobile_web`, `gs_cdn_pc_web` #### soundcloud * `formats`: Formats to request from the API. Requested values should be in the format of `{protocol}_{extension}` (omitting the bitrate), e.g. `hls_opus,http_aac`. The `*` character functions as a wildcard, e.g. `*_mp3`, and can be passed by itself to request all formats. Known protocols include `http`, `hls` and `hls-aes`; known extensions include `aac`, `opus` and `mp3`. Original `download` formats are always extracted. Default is `http_aac,hls_aac,http_opus,hls_opus,http_mp3,hls_mp3` #### orfon (orf:on) * `prefer_segments_playlist`: Prefer a playlist of program segments instead of a single complete video when available. If individual segments are desired, use `--concat-playlist never --extractor-args "orfon:prefer_segments_playlist"` #### bilibili * `prefer_multi_flv`: Prefer extracting flv formats over mp4 for older videos that still provide legacy formats #### digitalconcerthall * `prefer_combined_hls`: Prefer extracting combined/pre-merged video and audio HLS formats. This will exclude 4K/HEVC video and lossless/FLAC audio formats, which are only available as split video/audio HLS formats **Note**: These options may be changed/removed in the future without concern for backward compatibility <!-- MANPAGE: MOVE "INSTALLATION" SECTION HERE --> # PLUGINS Note that **all** plugins are imported even if not invoked, and that **there are no checks** performed on plugin code. **Use plugins at your own risk and only if you trust the code!** Plugins can be of `<type>`s `extractor` or `postprocessor`. - Extractor plugins do not need to be enabled from the CLI and are automatically invoked when the input URL is suitable for it. - Extractor plugins take priority over built-in extractors. - Postprocessor plugins can be invoked using `--use-postprocessor NAME`. Plugins are loaded from the namespace packages `yt_dlp_plugins.extractor` and `yt_dlp_plugins.postprocessor`. In other words, the file structure on the disk looks something like: yt_dlp_plugins/ extractor/ myplugin.py postprocessor/ myplugin.py yt-dlp looks for these `yt_dlp_plugins` namespace folders in many locations (see below) and loads in plugins from **all** of them. See the [wiki for some known plugins](https://github.com/yt-dlp/yt-dlp/wiki/Plugins) ## Installing Plugins Plugins can be installed using various methods and locations. 1. **Configuration directories**: Plugin packages (containing a `yt_dlp_plugins` namespace folder) can be dropped into the following standard [configuration locations](#configuration): * **User Plugins** * `${XDG_CONFIG_HOME}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Linux/macOS) * `${XDG_CONFIG_HOME}/yt-dlp-plugins/<package name>/yt_dlp_plugins/` * `${APPDATA}/yt-dlp/plugins/<package name>/yt_dlp_plugins/` (recommended on Windows) * `${APPDATA}/yt-dlp-plugins/<package name>/yt_dlp_plugins/` * `~/.yt-dlp/plugins/<package name>/yt_dlp_plugins/` * `~/yt-dlp-plugins/<package name>/yt_dlp_plugins/` * **System Plugins** * `/etc/yt-dlp/plugins/<package name>/yt_dlp_plugins/` * `/etc/yt-dlp-plugins/<package name>/yt_dlp_plugins/` 2. **Executable location**: Plugin packages can similarly be installed in a `yt-dlp-plugins` directory under the executable location (recommended for portable installations): * Binary: where `<root-dir>/yt-dlp.exe`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/` * Source: where `<root-dir>/yt_dlp/__main__.py`, `<root-dir>/yt-dlp-plugins/<package name>/yt_dlp_plugins/` 3. **pip and other locations in `PYTHONPATH`** * Plugin packages can be installed and managed using `pip`. See [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) for an example. * Note: plugin files between plugin packages installed with pip must have unique filenames. * Any path in `PYTHONPATH` is searched in for the `yt_dlp_plugins` namespace folder. * Note: This does not apply for Pyinstaller/py2exe builds. `.zip`, `.egg` and `.whl` archives containing a `yt_dlp_plugins` namespace folder in their root are also supported as plugin packages. * e.g. `${XDG_CONFIG_HOME}/yt-dlp/plugins/mypluginpkg.zip` where `mypluginpkg.zip` contains `yt_dlp_plugins/<type>/myplugin.py` Run yt-dlp with `--verbose` to check if the plugin has been loaded. ## Developing Plugins See the [yt-dlp-sample-plugins](https://github.com/yt-dlp/yt-dlp-sample-plugins) repo for a template plugin package and the [Plugin Development](https://github.com/yt-dlp/yt-dlp/wiki/Plugin-Development) section of the wiki for a plugin development guide. All public classes with a name ending in `IE`/`PP` are imported from each file for extractors and postprocessors respectively. This respects underscore prefix (e.g. `_MyBasePluginIE` is private) and `__all__`. Modules can similarly be excluded by prefixing the module name with an underscore (e.g. `_myplugin.py`). To replace an existing extractor with a subclass of one, set the `plugin_name` class keyword argument (e.g. `class MyPluginIE(ABuiltInIE, plugin_name='myplugin')` will replace `ABuiltInIE` with `MyPluginIE`). Since the extractor replaces the parent, you should exclude the subclass extractor from being imported separately by making it private using one of the methods described above. If you are a plugin author, add [yt-dlp-plugins](https://github.com/topics/yt-dlp-plugins) as a topic to your repository for discoverability. See the [Developer Instructions](https://github.com/yt-dlp/yt-dlp/blob/master/CONTRIBUTING.md#developer-instructions) on how to write and test an extractor. # EMBEDDING YT-DLP yt-dlp makes the best effort to be a good command-line program, and thus should be callable from any programming language. Your program should avoid parsing the normal stdout since they may change in future versions. Instead, they should use options such as `-J`, `--print`, `--progress-template`, `--exec` etc to create console output that you can reliably reproduce and parse. From a Python program, you can embed yt-dlp in a more powerful fashion, like this: ```python from yt_dlp import YoutubeDL URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] with YoutubeDL() as ydl: ydl.download(URLS) ``` Most likely, you'll want to use various options. For a list of options available, have a look at [`yt_dlp/YoutubeDL.py`](yt_dlp/YoutubeDL.py#L183) or `help(yt_dlp.YoutubeDL)` in a Python shell. If you are already familiar with the CLI, you can use [`devscripts/cli_to_api.py`](https://github.com/yt-dlp/yt-dlp/blob/master/devscripts/cli_to_api.py) to translate any CLI switches to `YoutubeDL` params. **Tip**: If you are porting your code from youtube-dl to yt-dlp, one important point to look out for is that we do not guarantee the return value of `YoutubeDL.extract_info` to be json serializable, or even be a dictionary. It will be dictionary-like, but if you want to ensure it is a serializable dictionary, pass it through `YoutubeDL.sanitize_info` as shown in the [example below](#extracting-information) ## Embedding examples #### Extracting information ```python import json import yt_dlp URL = 'https://www.youtube.com/watch?v=BaW_jenozKc' # ℹ️ See help(yt_dlp.YoutubeDL) for a list of available options and public functions ydl_opts = {} with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(URL, download=False) # ℹ️ ydl.sanitize_info makes the info json-serializable print(json.dumps(ydl.sanitize_info(info))) ``` #### Download using an info-json ```python import yt_dlp INFO_FILE = 'path/to/video.info.json' with yt_dlp.YoutubeDL() as ydl: error_code = ydl.download_with_info_file(INFO_FILE) print('Some videos failed to download' if error_code else 'All videos successfully downloaded') ``` #### Extract audio ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] ydl_opts = { 'format': 'm4a/bestaudio/best', # ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments 'postprocessors': [{ # Extract audio using ffmpeg 'key': 'FFmpegExtractAudio', 'preferredcodec': 'm4a', }] } with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) ``` #### Filter videos ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] def longer_than_a_minute(info, *, incomplete): """Download only videos longer than a minute (or with unknown duration)""" duration = info.get('duration') if duration and duration < 60: return 'The video is too short' ydl_opts = { 'match_filter': longer_than_a_minute, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: error_code = ydl.download(URLS) ``` #### Adding logger and progress hook ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] class MyLogger: def debug(self, msg): # For compatibility with youtube-dl, both debug and info are passed into debug # You can distinguish them by the prefix '[debug] ' if msg.startswith('[debug] '): pass else: self.info(msg) def info(self, msg): pass def warning(self, msg): pass def error(self, msg): print(msg) # ℹ️ See "progress_hooks" in help(yt_dlp.YoutubeDL) def my_hook(d): if d['status'] == 'finished': print('Done downloading, now post-processing ...') ydl_opts = { 'logger': MyLogger(), 'progress_hooks': [my_hook], } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS) ``` #### Add a custom PostProcessor ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] # ℹ️ See help(yt_dlp.postprocessor.PostProcessor) class MyCustomPP(yt_dlp.postprocessor.PostProcessor): def run(self, info): self.to_screen('Doing stuff') return [], info with yt_dlp.YoutubeDL() as ydl: # ℹ️ "when" can take any value in yt_dlp.utils.POSTPROCESS_WHEN ydl.add_post_processor(MyCustomPP(), when='pre_process') ydl.download(URLS) ``` #### Use a custom format selector ```python import yt_dlp URLS = ['https://www.youtube.com/watch?v=BaW_jenozKc'] def format_selector(ctx): """ Select the best video and the best audio that won't result in an mkv. NOTE: This is just an example and does not handle all cases """ # formats are already sorted worst to best formats = ctx.get('formats')[::-1] # acodec='none' means there is no audio best_video = next(f for f in formats if f['vcodec'] != 'none' and f['acodec'] == 'none') # find compatible audio extension audio_ext = {'mp4': 'm4a', 'webm': 'webm'}[best_video['ext']] # vcodec='none' means there is no video best_audio = next(f for f in formats if ( f['acodec'] != 'none' and f['vcodec'] == 'none' and f['ext'] == audio_ext)) # These are the minimum required fields for a merged format yield { 'format_id': f'{best_video["format_id"]}+{best_audio["format_id"]}', 'ext': best_video['ext'], 'requested_formats': [best_video, best_audio], # Must be + separated list of protocols 'protocol': f'{best_video["protocol"]}+{best_audio["protocol"]}' } ydl_opts = { 'format': format_selector, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download(URLS) ``` # CHANGES FROM YOUTUBE-DL ### New features * Forked from [**yt-dlc@f9401f2**](https://github.com/blackjack4494/yt-dlc/commit/f9401f2a91987068139c5f757b12fc711d4c0cee) and merged with [**youtube-dl@a08f2b7**](https://github.com/ytdl-org/youtube-dl/commit/a08f2b7e4567cdc50c0614ee0a4ffdff49b8b6e6) ([exceptions](https://github.com/yt-dlp/yt-dlp/issues/21)) * **[SponsorBlock Integration](#sponsorblock-options)**: You can mark/remove sponsor sections in YouTube videos by utilizing the [SponsorBlock](https://sponsor.ajay.app) API * **[Format Sorting](#sorting-formats)**: The default format sorting options have been changed so that higher resolution and better codecs will be now preferred instead of simply using larger bitrate. Furthermore, you can now specify the sort order using `-S`. This allows for much easier format selection than what is possible by simply using `--format` ([examples](#format-selection-examples)) * **Merged with animelover1984/youtube-dl**: You get most of the features and improvements from [animelover1984/youtube-dl](https://github.com/animelover1984/youtube-dl) including `--write-comments`, `BiliBiliSearch`, `BilibiliChannel`, Embedding thumbnail in mp4/ogg/opus, playlist infojson etc. Note that NicoNico livestreams are not available. See [#31](https://github.com/yt-dlp/yt-dlp/pull/31) for details. * **YouTube improvements**: * Supports Clips, Stories (`ytstories:<channel UCID>`), Search (including filters)**\***, YouTube Music Search, Channel-specific search, Search prefixes (`ytsearch:`, `ytsearchdate:`)**\***, Mixes, and Feeds (`:ytfav`, `:ytwatchlater`, `:ytsubs`, `:ythistory`, `:ytrec`, `:ytnotif`) * Fix for [n-sig based throttling](https://github.com/ytdl-org/youtube-dl/issues/29326) **\*** * Supports some (but not all) age-gated content without cookies * Download livestreams from the start using `--live-from-start` (*experimental*) * Channel URLs download all uploads of the channel, including shorts and live * **Cookies from browser**: Cookies can be automatically extracted from all major web browsers using `--cookies-from-browser BROWSER[+KEYRING][:PROFILE][::CONTAINER]` * **Download time range**: Videos can be downloaded partially based on either timestamps or chapters using `--download-sections` * **Split video by chapters**: Videos can be split into multiple files based on chapters using `--split-chapters` * **Multi-threaded fragment downloads**: Download multiple fragments of m3u8/mpd videos in parallel. Use `--concurrent-fragments` (`-N`) option to set the number of threads used * **Aria2c with HLS/DASH**: You can use `aria2c` as the external downloader for DASH(mpd) and HLS(m3u8) formats * **New and fixed extractors**: Many new extractors have been added and a lot of existing ones have been fixed. See the [changelog](Changelog.md) or the [list of supported sites](supportedsites.md) * **New MSOs**: Philo, Spectrum, SlingTV, Cablevision, RCN etc. * **Subtitle extraction from manifests**: Subtitles can be extracted from streaming media manifests. See [commit/be6202f](https://github.com/yt-dlp/yt-dlp/commit/be6202f12b97858b9d716e608394b51065d0419f) for details * **Multiple paths and output templates**: You can give different [output templates](#output-template) and download paths for different types of files. You can also set a temporary path where intermediary files are downloaded to using `--paths` (`-P`) * **Portable Configuration**: Configuration files are automatically loaded from the home and root directories. See [CONFIGURATION](#configuration) for details * **Output template improvements**: Output templates can now have date-time formatting, numeric offsets, object traversal etc. See [output template](#output-template) for details. Even more advanced operations can also be done with the help of `--parse-metadata` and `--replace-in-metadata` * **Other new options**: Many new options have been added such as `--alias`, `--print`, `--concat-playlist`, `--wait-for-video`, `--retry-sleep`, `--sleep-requests`, `--convert-thumbnails`, `--force-download-archive`, `--force-overwrites`, `--break-match-filters` etc * **Improvements**: Regex and other operators in `--format`/`--match-filters`, multiple `--postprocessor-args` and `--downloader-args`, faster archive checking, more [format selection options](#format-selection), merge multi-video/audio, multiple `--config-locations`, `--exec` at different stages, etc * **Plugins**: Extractors and PostProcessors can be loaded from an external file. See [plugins](#plugins) for details * **Self updater**: The releases can be updated using `yt-dlp -U`, and downgraded using `--update-to` if required * **Automated builds**: [Nightly/master builds](#update-channels) can be used with `--update-to nightly` and `--update-to master` See [changelog](Changelog.md) or [commits](https://github.com/yt-dlp/yt-dlp/commits) for the full list of changes Features marked with a **\*** have been back-ported to youtube-dl ### Differences in default behavior Some of yt-dlp's default options are different from that of youtube-dl and youtube-dlc: * yt-dlp supports only [Python 3.8+](## "Windows 7"), and *may* remove support for more versions as they [become EOL](https://devguide.python.org/versions/#python-release-cycle); while [youtube-dl still supports Python 2.6+ and 3.2+](https://github.com/ytdl-org/youtube-dl/issues/30568#issue-1118238743) * The options `--auto-number` (`-A`), `--title` (`-t`) and `--literal` (`-l`), no longer work. See [removed options](#Removed) for details * `avconv` is not supported as an alternative to `ffmpeg` * yt-dlp stores config files in slightly different locations to youtube-dl. See [CONFIGURATION](#configuration) for a list of correct locations * The default [output template](#output-template) is `%(title)s [%(id)s].%(ext)s`. There is no real reason for this change. This was changed before yt-dlp was ever made public and now there are no plans to change it back to `%(title)s-%(id)s.%(ext)s`. Instead, you may use `--compat-options filename` * The default [format sorting](#sorting-formats) is different from youtube-dl and prefers higher resolution and better codecs rather than higher bitrates. You can use the `--format-sort` option to change this to any order you prefer, or use `--compat-options format-sort` to use youtube-dl's sorting order * The default format selector is `bv*+ba/b`. This means that if a combined video + audio format that is better than the best video-only format is found, the former will be preferred. Use `-f bv+ba/b` or `--compat-options format-spec` to revert this * Unlike youtube-dlc, yt-dlp does not allow merging multiple audio/video streams into one file by default (since this conflicts with the use of `-f bv*+ba`). If needed, this feature must be enabled using `--audio-multistreams` and `--video-multistreams`. You can also use `--compat-options multistreams` to enable both * `--no-abort-on-error` is enabled by default. Use `--abort-on-error` or `--compat-options abort-on-error` to abort on errors instead * When writing metadata files such as thumbnails, description or infojson, the same information (if available) is also written for playlists. Use `--no-write-playlist-metafiles` or `--compat-options no-playlist-metafiles` to not write these files * `--add-metadata` attaches the `infojson` to `mkv` files in addition to writing the metadata when used with `--write-info-json`. Use `--no-embed-info-json` or `--compat-options no-attach-info-json` to revert this * Some metadata are embedded into different fields when using `--add-metadata` as compared to youtube-dl. Most notably, `comment` field contains the `webpage_url` and `synopsis` contains the `description`. You can [use `--parse-metadata`](#modifying-metadata) to modify this to your liking or use `--compat-options embed-metadata` to revert this * `playlist_index` behaves differently when used with options like `--playlist-reverse` and `--playlist-items`. See [#302](https://github.com/yt-dlp/yt-dlp/issues/302) for details. You can use `--compat-options playlist-index` if you want to keep the earlier behavior * The output of `-F` is listed in a new format. Use `--compat-options list-formats` to revert this * Live chats (if available) are considered as subtitles. Use `--sub-langs all,-live_chat` to download all subtitles except live chat. You can also use `--compat-options no-live-chat` to prevent any live chat/danmaku from downloading * YouTube channel URLs download all uploads of the channel. To download only the videos in a specific tab, pass the tab's URL. If the channel does not show the requested tab, an error will be raised. Also, `/live` URLs raise an error if there are no live videos instead of silently downloading the entire channel. You may use `--compat-options no-youtube-channel-redirect` to revert all these redirections * Unavailable videos are also listed for YouTube playlists. Use `--compat-options no-youtube-unavailable-videos` to remove this * The upload dates extracted from YouTube are in UTC [when available](https://github.com/yt-dlp/yt-dlp/blob/89e4d86171c7b7c997c77d4714542e0383bf0db0/yt_dlp/extractor/youtube.py#L3898-L3900). Use `--compat-options no-youtube-prefer-utc-upload-date` to prefer the non-UTC upload date. * If `ffmpeg` is used as the downloader, the downloading and merging of formats happen in a single step when possible. Use `--compat-options no-direct-merge` to revert this * Thumbnail embedding in `mp4` is done with mutagen if possible. Use `--compat-options embed-thumbnail-atomicparsley` to force the use of AtomicParsley instead * Some internal metadata such as filenames are removed by default from the infojson. Use `--no-clean-infojson` or `--compat-options no-clean-infojson` to revert this * When `--embed-subs` and `--write-subs` are used together, the subtitles are written to disk and also embedded in the media file. You can use just `--embed-subs` to embed the subs and automatically delete the separate file. See [#630 (comment)](https://github.com/yt-dlp/yt-dlp/issues/630#issuecomment-893659460) for more info. `--compat-options no-keep-subs` can be used to revert this * `certifi` will be used for SSL root certificates, if installed. If you want to use system certificates (e.g. self-signed), use `--compat-options no-certifi` * yt-dlp's sanitization of invalid characters in filenames is different/smarter than in youtube-dl. You can use `--compat-options filename-sanitization` to revert to youtube-dl's behavior * ~~yt-dlp tries to parse the external downloader outputs into the standard progress output if possible (Currently implemented: [aria2c](https://github.com/yt-dlp/yt-dlp/issues/5931)). You can use `--compat-options no-external-downloader-progress` to get the downloader output as-is~~ * yt-dlp versions between 2021.09.01 and 2023.01.02 applies `--match-filters` to nested playlists. This was an unintentional side-effect of [8f18ac](https://github.com/yt-dlp/yt-dlp/commit/8f18aca8717bb0dd49054555af8d386e5eda3a88) and is fixed in [d7b460](https://github.com/yt-dlp/yt-dlp/commit/d7b460d0e5fc710950582baed2e3fc616ed98a80). Use `--compat-options playlist-match-filter` to revert this * yt-dlp versions between 2021.11.10 and 2023.06.21 estimated `filesize_approx` values for fragmented/manifest formats. This was added for convenience in [f2fe69](https://github.com/yt-dlp/yt-dlp/commit/f2fe69c7b0d208bdb1f6292b4ae92bc1e1a7444a), but was reverted in [0dff8e](https://github.com/yt-dlp/yt-dlp/commit/0dff8e4d1e6e9fb938f4256ea9af7d81f42fd54f) due to the potentially extreme inaccuracy of the estimated values. Use `--compat-options manifest-filesize-approx` to keep extracting the estimated values * yt-dlp uses modern http client backends such as `requests`. Use `--compat-options prefer-legacy-http-handler` to prefer the legacy http handler (`urllib`) to be used for standard http requests. * The sub-modules `swfinterp`, `casefold` are removed. * Passing `--simulate` (or calling `extract_info` with `download=False`) no longer alters the default format selection. See [#9843](https://github.com/yt-dlp/yt-dlp/issues/9843) for details. For ease of use, a few more compat options are available: * `--compat-options all`: Use all compat options (**Do NOT use this!**) * `--compat-options youtube-dl`: Same as `--compat-options all,-multistreams,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` * `--compat-options youtube-dlc`: Same as `--compat-options all,-no-live-chat,-no-youtube-channel-redirect,-playlist-match-filter,-manifest-filesize-approx,-allow-unsafe-ext` * `--compat-options 2021`: Same as `--compat-options 2022,no-certifi,filename-sanitization,no-youtube-prefer-utc-upload-date` * `--compat-options 2022`: Same as `--compat-options 2023,playlist-match-filter,no-external-downloader-progress,prefer-legacy-http-handler,manifest-filesize-approx` * `--compat-options 2023`: Currently does nothing. Use this to enable all future compat options The following compat options restore vulnerable behavior from before security patches: * `--compat-options allow-unsafe-ext`: Allow files with any extension (including unsafe ones) to be downloaded ([GHSA-79w7-vh3h-8g4j](<https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j>)) > :warning: Only use if a valid file download is rejected because its extension is detected as uncommon > > **This option can enable remote code execution! Consider [opening an issue](<https://github.com/yt-dlp/yt-dlp/issues/new/choose>) instead!** ### Deprecated options These are all the deprecated options and the current alternative to achieve the same effect #### Almost redundant options While these options are almost the same as their new counterparts, there are some differences that prevents them being redundant -j, --dump-json --print "%()j" -F, --list-formats --print formats_table --list-thumbnails --print thumbnails_table --print playlist:thumbnails_table --list-subs --print automatic_captions_table --print subtitles_table #### Redundant options While these options are redundant, they are still expected to be used due to their ease of use --get-description --print description --get-duration --print duration_string --get-filename --print filename --get-format --print format --get-id --print id --get-thumbnail --print thumbnail -e, --get-title --print title -g, --get-url --print urls --match-title REGEX --match-filters "title ~= (?i)REGEX" --reject-title REGEX --match-filters "title !~= (?i)REGEX" --min-views COUNT --match-filters "view_count >=? COUNT" --max-views COUNT --match-filters "view_count <=? COUNT" --break-on-reject Use --break-match-filters --user-agent UA --add-header "User-Agent:UA" --referer URL --add-header "Referer:URL" --playlist-start NUMBER -I NUMBER: --playlist-end NUMBER -I :NUMBER --playlist-reverse -I ::-1 --no-playlist-reverse Default --no-colors --color no_color #### Not recommended While these options still work, their use is not recommended since there are other alternatives to achieve the same --force-generic-extractor --ies generic,default --exec-before-download CMD --exec "before_dl:CMD" --no-exec-before-download --no-exec --all-formats -f all --all-subs --sub-langs all --write-subs --print-json -j --no-simulate --autonumber-size NUMBER Use string formatting, e.g. %(autonumber)03d --autonumber-start NUMBER Use internal field formatting like %(autonumber+NUMBER)s --id -o "%(id)s.%(ext)s" --metadata-from-title FORMAT --parse-metadata "%(title)s:FORMAT" --hls-prefer-native --downloader "m3u8:native" --hls-prefer-ffmpeg --downloader "m3u8:ffmpeg" --list-formats-old --compat-options list-formats (Alias: --no-list-formats-as-table) --list-formats-as-table --compat-options -list-formats [Default] (Alias: --no-list-formats-old) --youtube-skip-dash-manifest --extractor-args "youtube:skip=dash" (Alias: --no-youtube-include-dash-manifest) --youtube-skip-hls-manifest --extractor-args "youtube:skip=hls" (Alias: --no-youtube-include-hls-manifest) --youtube-include-dash-manifest Default (Alias: --no-youtube-skip-dash-manifest) --youtube-include-hls-manifest Default (Alias: --no-youtube-skip-hls-manifest) --geo-bypass --xff "default" --no-geo-bypass --xff "never" --geo-bypass-country CODE --xff CODE --geo-bypass-ip-block IP_BLOCK --xff IP_BLOCK #### Developer options These options are not intended to be used by the end-user --test Download only part of video for testing extractors --load-pages Load pages dumped by --write-pages --youtube-print-sig-code For testing youtube signatures --allow-unplayable-formats List unplayable formats also --no-allow-unplayable-formats Default #### Old aliases These are aliases that are no longer documented for various reasons --avconv-location --ffmpeg-location --clean-infojson --clean-info-json --cn-verification-proxy URL --geo-verification-proxy URL --dump-headers --print-traffic --dump-intermediate-pages --dump-pages --force-write-download-archive --force-write-archive --load-info --load-info-json --no-clean-infojson --no-clean-info-json --no-split-tracks --no-split-chapters --no-write-srt --no-write-subs --prefer-unsecure --prefer-insecure --rate-limit RATE --limit-rate RATE --split-tracks --split-chapters --srt-lang LANGS --sub-langs LANGS --trim-file-names LENGTH --trim-filenames LENGTH --write-srt --write-subs --yes-overwrites --force-overwrites #### Sponskrub Options Support for [SponSkrub](https://github.com/faissaloo/SponSkrub) has been deprecated in favor of the `--sponsorblock` options --sponskrub --sponsorblock-mark all --no-sponskrub --no-sponsorblock --sponskrub-cut --sponsorblock-remove all --no-sponskrub-cut --sponsorblock-remove -all --sponskrub-force Not applicable --no-sponskrub-force Not applicable --sponskrub-location Not applicable --sponskrub-args Not applicable #### No longer supported These options may no longer work as intended --prefer-avconv avconv is not officially supported by yt-dlp (Alias: --no-prefer-ffmpeg) --prefer-ffmpeg Default (Alias: --no-prefer-avconv) -C, --call-home Not implemented --no-call-home Default --include-ads No longer supported --no-include-ads Default --write-annotations No supported site has annotations now --no-write-annotations Default --compat-options seperate-video-versions No longer needed --compat-options no-youtube-prefer-utc-upload-date No longer supported #### Removed These options were deprecated since 2014 and have now been entirely removed -A, --auto-number -o "%(autonumber)s-%(id)s.%(ext)s" -t, -l, --title, --literal -o "%(title)s-%(id)s.%(ext)s" # CONTRIBUTING See [CONTRIBUTING.md](CONTRIBUTING.md#contributing-to-yt-dlp) for instructions on [Opening an Issue](CONTRIBUTING.md#opening-an-issue) and [Contributing code to the project](CONTRIBUTING.md#developer-instructions) # WIKI See the [Wiki](https://github.com/yt-dlp/yt-dlp/wiki) for more information ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/���������������������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0014624�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/__init__.py����������������������������������������������������������������0000664�0000000�0000000�00000000000�14675634471�0016723�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/docker/��������������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0016073�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/docker/compose.yml���������������������������������������������������������0000664�0000000�0000000�00000000275�14675634471�0020267�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������services: static: build: static environment: channel: ${channel} origin: ${origin} version: ${version} volumes: - ~/build:/build - ../..:/yt-dlp �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/docker/static/�������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0017362�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/docker/static/Dockerfile���������������������������������������������������0000664�0000000�0000000�00000000715�14675634471�0021357�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������FROM alpine:3.19 as base RUN apk --update add --no-cache \ build-base \ python3 \ pipx \ ; RUN pipx install pyinstaller # Requires above step to prepare the shared venv RUN ~/.local/share/pipx/shared/bin/python -m pip install -U wheel RUN apk --update add --no-cache \ scons \ patchelf \ binutils \ ; RUN pipx install staticx WORKDIR /yt-dlp COPY entrypoint.sh /entrypoint.sh ENTRYPOINT /entrypoint.sh ���������������������������������������������������yt-dlp-2024.09.27/bundle/docker/static/entrypoint.sh������������������������������������������������0000775�0000000�0000000�00000000665�14675634471�0022143�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/bin/ash set -e source ~/.local/share/pipx/venvs/pyinstaller/bin/activate python -m devscripts.install_deps --include secretstorage --include curl-cffi python -m devscripts.make_lazy_extractors python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}" python -m bundle.pyinstaller deactivate source ~/.local/share/pipx/venvs/staticx/bin/activate staticx /yt-dlp/dist/yt-dlp_linux /build/yt-dlp_linux deactivate ���������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/py2exe.py������������������������������������������������������������������0000775�0000000�0000000�00000003330�14675634471�0016414�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow execution from anywhere import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import warnings from py2exe import freeze from devscripts.utils import read_version VERSION = read_version() def main(): warnings.warn( 'py2exe builds do not support pycryptodomex and needs VC++14 to run. ' 'It is recommended to run "pyinst.py" to build using pyinstaller instead') freeze( console=[{ 'script': './yt_dlp/__main__.py', 'dest_base': 'yt-dlp', 'icon_resources': [(1, 'devscripts/logo.ico')], }], version_info={ 'version': VERSION, 'description': 'A feature-rich command-line audio/video downloader', 'comments': 'Official repository: <https://github.com/yt-dlp/yt-dlp>', 'product_name': 'yt-dlp', 'product_version': VERSION, }, options={ 'bundle_files': 0, 'compressed': 1, 'optimize': 2, 'dist_dir': './dist', 'excludes': [ # py2exe cannot import Crypto 'Crypto', 'Cryptodome', # requests >=2.32.0 breaks py2exe builds due to certifi dependency 'requests', 'urllib3', ], 'dll_excludes': ['w9xpopen.exe', 'crypt32.dll'], # Modules that are only imported dynamically must be added here 'includes': ['yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated', 'yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated'], }, zipfile=None, ) if __name__ == '__main__': main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/bundle/pyinstaller.py�������������������������������������������������������������0000775�0000000�0000000�00000010127�14675634471�0017550�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import platform from PyInstaller.__main__ import run as run_pyinstaller from devscripts.utils import read_version OS_NAME, MACHINE, ARCH = sys.platform, platform.machine().lower(), platform.architecture()[0][:2] if MACHINE in ('x86', 'x86_64', 'amd64', 'i386', 'i686'): MACHINE = 'x86' if ARCH == '32' else '' def main(): opts, version = parse_options(), read_version() onedir = '--onedir' in opts or '-D' in opts if not onedir and '-F' not in opts and '--onefile' not in opts: opts.append('--onefile') name, final_file = exe(onedir) print(f'Building yt-dlp v{version} for {OS_NAME} {platform.machine()} with options {opts}') print('Remember to update the version using "devscripts/update-version.py"') if not os.path.isfile('yt_dlp/extractor/lazy_extractors.py'): print('WARNING: Building without lazy_extractors. Run ' '"devscripts/make_lazy_extractors.py" to build lazy extractors', file=sys.stderr) print(f'Destination: {final_file}\n') opts = [ f'--name={name}', '--icon=devscripts/logo.ico', '--upx-exclude=vcruntime140.dll', '--noconfirm', '--additional-hooks-dir=yt_dlp/__pyinstaller', *opts, 'yt_dlp/__main__.py', ] print(f'Running PyInstaller with {opts}') run_pyinstaller(opts) set_version_info(final_file, version) def parse_options(): # Compatibility with older arguments opts = sys.argv[1:] if opts[0:1] in (['32'], ['64']): if ARCH != opts[0]: raise Exception(f'{opts[0]}bit executable cannot be built on a {ARCH}bit system') opts = opts[1:] return opts def exe(onedir): """@returns (name, path)""" name = '_'.join(filter(None, ( 'yt-dlp', {'win32': '', 'darwin': 'macos'}.get(OS_NAME, OS_NAME), MACHINE, ))) return name, ''.join(filter(None, ( 'dist/', onedir and f'{name}/', name, OS_NAME == 'win32' and '.exe', ))) def version_to_list(version): version_list = version.split('.') return list(map(int, version_list)) + [0] * (4 - len(version_list)) def set_version_info(exe, version): if OS_NAME == 'win32': windows_set_version(exe, version) def windows_set_version(exe, version): from PyInstaller.utils.win32.versioninfo import ( FixedFileInfo, StringFileInfo, StringStruct, StringTable, VarFileInfo, VarStruct, VSVersionInfo, ) try: from PyInstaller.utils.win32.versioninfo import SetVersion except ImportError: # Pyinstaller >= 5.8 from PyInstaller.utils.win32.versioninfo import write_version_info_to_executable as SetVersion version_list = version_to_list(version) suffix = MACHINE and f'_{MACHINE}' SetVersion(exe, VSVersionInfo( ffi=FixedFileInfo( filevers=version_list, prodvers=version_list, mask=0x3F, flags=0x0, OS=0x4, fileType=0x1, subtype=0x0, date=(0, 0), ), kids=[ StringFileInfo([StringTable('040904B0', [ StringStruct('Comments', f'yt-dlp{suffix} Command Line Interface'), StringStruct('CompanyName', 'https://github.com/yt-dlp'), StringStruct('FileDescription', 'yt-dlp%s' % (MACHINE and f' ({MACHINE})')), StringStruct('FileVersion', version), StringStruct('InternalName', f'yt-dlp{suffix}'), StringStruct('LegalCopyright', 'pukkandan.ytdlp@gmail.com | UNLICENSE'), StringStruct('OriginalFilename', f'yt-dlp{suffix}.exe'), StringStruct('ProductName', f'yt-dlp{suffix}'), StringStruct( 'ProductVersion', f'{version}{suffix} on Python {platform.python_version()}'), ])]), VarFileInfo([VarStruct('Translation', [0, 1200])]), ], )) if __name__ == '__main__': main() �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/�����������������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0015541�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/__init__.py������������������������������������������������������������0000664�0000000�0000000�00000000000�14675634471�0017640�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/bash-completion.in�����������������������������������������������������0000664�0000000�0000000�00000001500�14675634471�0021151�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������__yt_dlp() { local cur prev opts fileopts diropts keywords COMPREPLY=() cur="${COMP_WORDS[COMP_CWORD]}" prev="${COMP_WORDS[COMP_CWORD-1]}" opts="{{flags}}" keywords=":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" fileopts="-a|--batch-file|--download-archive|--cookies|--load-info" diropts="--cache-dir" if [[ ${prev} =~ ${fileopts} ]]; then COMPREPLY=( $(compgen -f -- ${cur}) ) return 0 elif [[ ${prev} =~ ${diropts} ]]; then COMPREPLY=( $(compgen -d -- ${cur}) ) return 0 fi if [[ ${cur} =~ : ]]; then COMPREPLY=( $(compgen -W "${keywords}" -- ${cur}) ) return 0 elif [[ ${cur} == * ]] ; then COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) ) return 0 fi } complete -F __yt_dlp yt-dlp ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/bash-completion.py�����������������������������������������������������0000775�0000000�0000000�00000001526�14675634471�0021206�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp BASH_COMPLETION_FILE = 'completions/bash/yt-dlp' BASH_COMPLETION_TEMPLATE = 'devscripts/bash-completion.in' def build_completion(opt_parser): opts_flag = [] for group in opt_parser.option_groups: for option in group.option_list: # for every long flag opts_flag.append(option.get_opt_string()) with open(BASH_COMPLETION_TEMPLATE) as f: template = f.read() with open(BASH_COMPLETION_FILE, 'w') as f: # just using the special char filled_template = template.replace('{{flags}}', ' '.join(opts_flag)) f.write(filled_template) parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/changelog_override.json������������������������������������������������0000664�0000000�0000000�00000023232�14675634471�0022264�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������[ { "action": "add", "when": "29cb20bd563c02671b31dd840139e93dd37150a1", "short": "[priority] **A new release type has been added!**\n * [`nightly`](https://github.com/yt-dlp/yt-dlp/releases/tag/nightly) builds will be made after each push, containing the latest fixes (but also possibly bugs).\n * When using `--update`/`-U`, a release binary will only update to its current channel (either `stable` or `nightly`).\n * The `--update-to` option has been added allowing the user more control over program upgrades (or downgrades).\n * `--update-to` can change the release channel (`stable`, `nightly`) and also upgrade or downgrade to specific tags.\n * **Usage**: `--update-to CHANNEL`, `--update-to TAG`, `--update-to CHANNEL@TAG`" }, { "action": "add", "when": "5038f6d713303e0967d002216e7a88652401c22a", "short": "[priority] **YouTube throttling fixes!**" }, { "action": "remove", "when": "2e023649ea4e11151545a34dc1360c114981a236" }, { "action": "add", "when": "01aba2519a0884ef17d5f85608dbd2a455577147", "short": "[priority] YouTube: Improved throttling and signature fixes" }, { "action": "change", "when": "c86e433c35fe5da6cb29f3539eef97497f84ed38", "short": "[extractor/niconico:series] Fix extraction (#6898)", "authors": ["sqrtNOT"] }, { "action": "change", "when": "69a40e4a7f6caa5662527ebd2f3c4e8aa02857a2", "short": "[extractor/youtube:music_search_url] Extract title (#7102)", "authors": ["kangalio"] }, { "action": "change", "when": "8417f26b8a819cd7ffcd4e000ca3e45033e670fb", "short": "Add option `--color` (#6904)", "authors": ["Grub4K"] }, { "action": "change", "when": "b4e0d75848e9447cee2cd3646ce54d4744a7ff56", "short": "Improve `--download-sections`\n - Support negative time-ranges\n - Add `*from-url` to obey time-ranges in URL", "authors": ["pukkandan"] }, { "action": "change", "when": "1e75d97db21152acc764b30a688e516f04b8a142", "short": "[extractor/youtube] Add `ios` to default clients used\n - IOS is affected neither by 403 nor by nsig so helps mitigate them preemptively\n - IOS also has higher bit-rate 'premium' formats though they are not labeled as such", "authors": ["pukkandan"] }, { "action": "change", "when": "f2ff0f6f1914b82d4a51681a72cc0828115dcb4a", "short": "[extractor/motherless] Add gallery support, fix groups (#7211)", "authors": ["rexlambert22", "Ti4eeT4e"] }, { "action": "change", "when": "a4486bfc1dc7057efca9dd3fe70d7fa25c56f700", "short": "[misc] Revert \"Add automatic duplicate issue detection\"", "authors": ["pukkandan"] }, { "action": "add", "when": "1ceb657bdd254ad961489e5060f2ccc7d556b729", "short": "[priority] Security: [[CVE-2023-35934](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-35934)] Fix [Cookie leak](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj)\n - `--add-header Cookie:` is deprecated and auto-scoped to input URL domains\n - Cookies are scoped when passed to external downloaders\n - Add `cookies` field to info.json and deprecate `http_headers.Cookie`" }, { "action": "change", "when": "b03fa7834579a01cc5fba48c0e73488a16683d48", "short": "[ie/twitter] Revert 92315c03774cfabb3a921884326beb4b981f786b", "authors": ["pukkandan"] }, { "action": "change", "when": "fcd6a76adc49d5cd8783985c7ce35384b72e545f", "short": "[test] Add tests for socks proxies (#7908)", "authors": ["coletdjnz"] }, { "action": "change", "when": "4bf912282a34b58b6b35d8f7e6be535770c89c76", "short": "[rh:urllib] Remove dot segments during URL normalization (#7662)", "authors": ["coletdjnz"] }, { "action": "change", "when": "59e92b1f1833440bb2190f847eb735cf0f90bc85", "short": "[rh:urllib] Simplify gzip decoding (#7611)", "authors": ["Grub4K"] }, { "action": "add", "when": "c1d71d0d9f41db5e4306c86af232f5f6220a130b", "short": "[priority] **The minimum *recommended* Python version has been raised to 3.8**\nSince Python 3.7 has reached end-of-life, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/7803)" }, { "action": "add", "when": "61bdf15fc7400601c3da1aa7a43917310a5bf391", "short": "[priority] Security: [[CVE-2023-40581](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40581)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-42h4-v29r-42qg)\n - The shell escape function is now using `\"\"` instead of `\\\"`.\n - `utils.Popen` has been patched to properly quote commands." }, { "action": "change", "when": "8a8b54523addf46dfd50ef599761a81bc22362e6", "short": "[rh:requests] Add handler for `requests` HTTP library (#3668)\n\n\tAdds support for HTTPS proxies and persistent connections (keep-alive)", "authors": ["bashonly", "coletdjnz", "Grub4K"] }, { "action": "add", "when": "1d03633c5a1621b9f3a756f0a4f9dc61fab3aeaa", "short": "[priority] **The release channels have been adjusted!**\n\t* [`master`](https://github.com/yt-dlp/yt-dlp-master-builds) builds are made after each push, containing the latest fixes (but also possibly bugs). This was previously the `nightly` channel.\n\t* [`nightly`](https://github.com/yt-dlp/yt-dlp-nightly-builds) builds are now made once a day, if there were any changes." }, { "action": "add", "when": "f04b5bedad7b281bee9814686bba1762bae092eb", "short": "[priority] Security: [[CVE-2023-46121](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-46121)] Patch [Generic Extractor MITM Vulnerability via Arbitrary Proxy Injection](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3ch3-jhc6-5r8x)\n\t- Disallow smuggling of arbitrary `http_headers`; extractors now only use specific headers" }, { "action": "change", "when": "15f22b4880b6b3f71f350c64d70976ae65b9f1ca", "short": "[webvtt] Allow spaces before newlines for CueBlock (#7681)", "authors": ["TSRBerry"] }, { "action": "change", "when": "4ce57d3b873c2887814cbec03d029533e82f7db5", "short": "[ie] Support multi-period MPD streams (#6654)", "authors": ["alard", "pukkandan"] }, { "action": "change", "when": "aa7e9ae4f48276bd5d0173966c77db9484f65a0a", "short": "[ie/xvideos] Support new URL format (#9502)", "authors": ["sta1us"] }, { "action": "remove", "when": "22e4dfacb61f62dfbb3eb41b31c7b69ba1059b80" }, { "action": "change", "when": "e3a3ed8a981d9395c4859b6ef56cd02bc3148db2", "short": "[cleanup:ie] No `from` stdlib imports in extractors", "authors": ["pukkandan"] }, { "action": "add", "when": "9590cc6b4768e190183d7d071a6c78170889116a", "short": "[priority] Security: [[CVE-2024-22423](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2024-22423)] [Prevent RCE when using `--exec` with `%q` on Windows](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-hjq6-52gw-2g7p)\n - The shell escape function now properly escapes `%`, `\\` and `\\n`.\n - `utils.Popen` has been patched accordingly." }, { "action": "change", "when": "41ba4a808b597a3afed78c89675a30deb6844450", "short": "[ie/tiktok] Extract via mobile API only if extractor-arg is passed (#9938)", "authors": ["bashonly"] }, { "action": "remove", "when": "6e36d17f404556f0e3a43f441c477a71a91877d9" }, { "action": "change", "when": "beaf832c7a9d57833f365ce18f6115b88071b296", "short": "[ie/soundcloud] Add `formats` extractor-arg (#10004)", "authors": ["bashonly", "Grub4K"] }, { "action": "change", "when": "5c019f6328ad40d66561eac3c4de0b3cd070d0f6", "short": "[cleanup] Misc (#9765)", "authors": ["bashonly", "Grub4K", "seproDev"] }, { "action": "change", "when": "e6a22834df1776ec4e486526f6df2bf53cb7e06f", "short": "[ie/orf:on] Add `prefer_segments_playlist` extractor-arg (#10314)", "authors": ["seproDev"] }, { "action": "add", "when": "6aaf96a3d6e7d0d426e97e11a2fcf52fda00e733", "short": "[priority] Security: [[CVE-2024-38519](https://nvd.nist.gov/vuln/detail/CVE-2024-38519)] [Properly sanitize file-extension to prevent file system modification and RCE](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j)\n - Unsafe extensions are now blocked from being downloaded" }, { "action": "add", "when": "6075a029dba70a89675ae1250e7cdfd91f0eba41", "short": "[priority] Security: [[ie/douyutv] Do not use dangerous javascript source/URL](https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-3v33-3wmw-3785)\n - A dependency on potentially malicious third-party JavaScript code has been removed from the Douyu extractors" }, { "action": "add", "when": "fb8b7f226d251e521a89b23c415e249e5b788e5c", "short": "[priority] **The minimum *recommended* Python version has been raised to 3.9**\nSince Python 3.8 will reach end-of-life in October 2024, support for it will be dropped soon. [Read more](https://github.com/yt-dlp/yt-dlp/issues/10086)" } ] ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/changelog_override.schema.json�����������������������������������������0000664�0000000�0000000�00000005542�14675634471�0023527�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ "$schema": "http://json-schema.org/draft/2020-12/schema", "type": "array", "uniqueItems": true, "items": { "type": "object", "oneOf": [ { "type": "object", "properties": { "action": { "enum": [ "add" ] }, "when": { "type": "string", "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" }, "hash": { "type": "string", "pattern": "^[0-9a-f]{40}$" }, "short": { "type": "string" }, "authors": { "type": "array", "items": { "type": "string" } } }, "required": [ "action", "short" ] }, { "type": "object", "properties": { "action": { "enum": [ "remove" ] }, "when": { "type": "string", "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" }, "hash": { "type": "string", "pattern": "^[0-9a-f]{40}$" } }, "required": [ "action", "hash" ] }, { "type": "object", "properties": { "action": { "enum": [ "change" ] }, "when": { "type": "string", "pattern": "^([0-9a-f]{40}|\\d{4}\\.\\d{2}\\.\\d{2})$" }, "hash": { "type": "string", "pattern": "^[0-9a-f]{40}$" }, "short": { "type": "string" }, "authors": { "type": "array", "items": { "type": "string" } } }, "required": [ "action", "hash", "short", "authors" ] } ] } } ��������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/check-porn.py����������������������������������������������������������0000664�0000000�0000000�00000003411�14675634471�0020143�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 """ This script employs a VERY basic heuristic ('porn' in webpage.lower()) to check if we are not 'age_limit' tagging some porn site A second approach implemented relies on a list of porn domains, to activate it pass the list filename as the only argument """ # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import urllib.parse import urllib.request from test.helper import gettestcases if len(sys.argv) > 1: METHOD = 'LIST' LIST = open(sys.argv[1]).read().decode('utf8').strip() else: METHOD = 'EURISTIC' for test in gettestcases(): if METHOD == 'EURISTIC': try: webpage = urllib.request.urlopen(test['url'], timeout=10).read() except Exception: print('\nFail: {}'.format(test['name'])) continue webpage = webpage.decode('utf8', 'replace') RESULT = 'porn' in webpage.lower() elif METHOD == 'LIST': domain = urllib.parse.urlparse(test['url']).netloc if not domain: print('\nFail: {}'.format(test['name'])) continue domain = '.'.join(domain.split('.')[-2:]) RESULT = ('.' + domain + '\n' in LIST or '\n' + domain + '\n' in LIST) if RESULT and ('info_dict' not in test or 'age_limit' not in test['info_dict'] or test['info_dict']['age_limit'] != 18): print('\nPotential missing age_limit check: {}'.format(test['name'])) elif not RESULT and ('info_dict' in test and 'age_limit' in test['info_dict'] and test['info_dict']['age_limit'] == 18): print('\nPotential false negative: {}'.format(test['name'])) else: sys.stdout.write('.') sys.stdout.flush() print() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/cli_to_api.py����������������������������������������������������������0000775�0000000�0000000�00000002550�14675634471�0020222�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp import yt_dlp.options create_parser = yt_dlp.options.create_parser def parse_patched_options(opts): patched_parser = create_parser() patched_parser.defaults.update({ 'ignoreerrors': False, 'retries': 0, 'fragment_retries': 0, 'extract_flat': False, 'concat_playlist': 'never', }) yt_dlp.options.create_parser = lambda: patched_parser try: return yt_dlp.parse_options(opts) finally: yt_dlp.options.create_parser = create_parser default_opts = parse_patched_options([]).ydl_opts def cli_to_api(opts, cli_defaults=False): opts = (yt_dlp.parse_options if cli_defaults else parse_patched_options)(opts).ydl_opts diff = {k: v for k, v in opts.items() if default_opts[k] != v} if 'postprocessors' in diff: diff['postprocessors'] = [pp for pp in diff['postprocessors'] if pp not in default_opts['postprocessors']] return diff if __name__ == '__main__': from pprint import pprint print('\nThe arguments passed translate to:\n') pprint(cli_to_api(sys.argv[1:])) print('\nCombining these with the CLI defaults gives:\n') pprint(cli_to_api(sys.argv[1:], True)) ��������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/fish-completion.in�����������������������������������������������������0000664�0000000�0000000�00000000176�14675634471�0021175�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������ {{commands}} complete --command yt-dlp --arguments ":ytfavorites :ytrecommended :ytsubscriptions :ytwatchlater :ythistory" ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/fish-completion.py�����������������������������������������������������0000775�0000000�0000000�00000003206�14675634471�0021217�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import optparse import yt_dlp from yt_dlp.utils import shell_quote FISH_COMPLETION_FILE = 'completions/fish/yt-dlp.fish' FISH_COMPLETION_TEMPLATE = 'devscripts/fish-completion.in' EXTRA_ARGS = { 'remux-video': ['--arguments', 'mp4 mkv', '--exclusive'], 'recode-video': ['--arguments', 'mp4 flv ogg webm mkv', '--exclusive'], # Options that need a file parameter 'download-archive': ['--require-parameter'], 'cookies': ['--require-parameter'], 'load-info': ['--require-parameter'], 'batch-file': ['--require-parameter'], } def build_completion(opt_parser): commands = [] for group in opt_parser.option_groups: for option in group.option_list: long_option = option.get_opt_string().strip('-') complete_cmd = ['complete', '--command', 'yt-dlp', '--long-option', long_option] if option._short_opts: complete_cmd += ['--short-option', option._short_opts[0].strip('-')] if option.help != optparse.SUPPRESS_HELP: complete_cmd += ['--description', option.help] complete_cmd.extend(EXTRA_ARGS.get(long_option, [])) commands.append(shell_quote(complete_cmd)) with open(FISH_COMPLETION_TEMPLATE) as f: template = f.read() filled_template = template.replace('{{commands}}', '\n'.join(commands)) with open(FISH_COMPLETION_FILE, 'w') as f: f.write(filled_template) parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/generate_aes_testdata.py�����������������������������������������������0000664�0000000�0000000�00000002163�14675634471�0022430�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import codecs import subprocess from yt_dlp.aes import aes_encrypt, key_expansion from yt_dlp.utils import intlist_to_bytes secret_msg = b'Secret message goes here' def hex_str(int_list): return codecs.encode(intlist_to_bytes(int_list), 'hex') def openssl_encode(algo, key, iv): cmd = ['openssl', 'enc', '-e', '-' + algo, '-K', hex_str(key), '-iv', hex_str(iv)] prog = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) out, _ = prog.communicate(secret_msg) return out iv = key = [0x20, 0x15] + 14 * [0] r = openssl_encode('aes-128-cbc', key, iv) print('aes_cbc_decrypt') print(repr(r)) password = key new_key = aes_encrypt(password, key_expansion(password)) r = openssl_encode('aes-128-ctr', new_key, iv) print('aes_decrypt_text 16') print(repr(r)) password = key + 16 * [0] new_key = aes_encrypt(password, key_expansion(password)) * (32 // 16) r = openssl_encode('aes-256-ctr', new_key, iv) print('aes_decrypt_text 32') print(repr(r)) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/install_deps.py��������������������������������������������������������0000775�0000000�0000000�00000005135�14675634471�0020603�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow execution from anywhere import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import argparse import re import subprocess from pathlib import Path from devscripts.tomlparse import parse_toml from devscripts.utils import read_file def parse_args(): parser = argparse.ArgumentParser(description='Install dependencies for yt-dlp') parser.add_argument( 'input', nargs='?', metavar='TOMLFILE', default=Path(__file__).parent.parent / 'pyproject.toml', help='input file (default: %(default)s)') parser.add_argument( '-e', '--exclude', metavar='DEPENDENCY', action='append', help='exclude a dependency') parser.add_argument( '-i', '--include', metavar='GROUP', action='append', help='include an optional dependency group') parser.add_argument( '-o', '--only-optional', action='store_true', help='only install optional dependencies') parser.add_argument( '-p', '--print', action='store_true', help='only print requirements to stdout') parser.add_argument( '-u', '--user', action='store_true', help='install with pip as --user') return parser.parse_args() def main(): args = parse_args() project_table = parse_toml(read_file(args.input))['project'] recursive_pattern = re.compile(rf'{project_table["name"]}\[(?P<group_name>[\w-]+)\]') optional_groups = project_table['optional-dependencies'] excludes = args.exclude or [] def yield_deps(group): for dep in group: if mobj := recursive_pattern.fullmatch(dep): yield from optional_groups.get(mobj.group('group_name'), []) else: yield dep targets = [] if not args.only_optional: # `-o` should exclude 'dependencies' and the 'default' group targets.extend(project_table['dependencies']) if 'default' not in excludes: # `--exclude default` should exclude entire 'default' group targets.extend(yield_deps(optional_groups['default'])) for include in filter(None, map(optional_groups.get, args.include or [])): targets.extend(yield_deps(include)) targets = [t for t in targets if re.match(r'[\w-]+', t).group(0).lower() not in excludes] if args.print: for target in targets: print(target) return pip_args = [sys.executable, '-m', 'pip', 'install', '-U'] if args.user: pip_args.append('--user') pip_args.extend(targets) return subprocess.call(pip_args) if __name__ == '__main__': sys.exit(main()) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/lazy_load_template.py��������������������������������������������������0000664�0000000�0000000�00000002332�14675634471�0021764�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import importlib import random import re from ..utils import ( age_restricted, bug_reports_message, classproperty, variadic, write_string, ) # These bloat the lazy_extractors, so allow them to passthrough silently ALLOWED_CLASSMETHODS = {'extract_from_webpage', 'get_testcases', 'get_webpage_testcases'} _WARNED = False class LazyLoadMetaClass(type): def __getattr__(cls, name): global _WARNED if ('_real_class' not in cls.__dict__ and name not in ALLOWED_CLASSMETHODS and not _WARNED): _WARNED = True write_string('WARNING: Falling back to normal extractor since lazy extractor ' f'{cls.__name__} does not have attribute {name}{bug_reports_message()}\n') return getattr(cls.real_class, name) class LazyLoadExtractor(metaclass=LazyLoadMetaClass): @classproperty def real_class(cls): if '_real_class' not in cls.__dict__: cls._real_class = getattr(importlib.import_module(cls._module), cls.__name__) return cls._real_class def __new__(cls, *args, **kwargs): instance = cls.real_class.__new__(cls.real_class) instance.__init__(*args, **kwargs) return instance ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/logo.ico���������������������������������������������������������������0000664�0000000�0000000�00000120123�14675634471�0017174�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������� �%��f���@@��� �(@����00��� �($��^�� ��� �(��ۂ����� �( ������� �(��+��PNG  ��� IHDR���������\rf���sRGB����gAMA�� a��� pHYs����od��IDATx^ U$, `BIgt ! (! a fD��3AvpPP $ $"ks~e]:BwխzܥW1cƌ-&L0{wi?oNf0ikkuGSSӖvfj B415f?ZZZIwM8Y߮0!D`I 9b!D�!H/fף=885^T�~:~߶ߤ!D fe_@�oA D\6`O=5F )B@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!ƏwM&f?SNs`9E#�\ƙ7v;03ZZܸrڌ=+P&eCDq$� Ffz*wíui5߿6oFYH�1fZ[}zfFv#<@!)Ca'" Bί5zq3`."^hZ?gxyj+7ь6]Z1cH�e`Ɨ-nո]O7-:O!^�׻5<<x� t�<`q ʯC ځ jF$?_[Vw b'e�@uzkM@6s^�K�.: W�H�D;Fliqk6k|>'`ԏ^We�6 �Z3�?>dN�p7v˷3Xa. vH�D;m�d-r^|1 MX 9ux3@�VYd67|Q hG KB`O۽N6[f?1a�`ב�b;˓( f_y�t6s|?2�M~^ѣ϶z+C&b#:VǍFFY?wذ$s�Ǝ �y-̺9�EK@\[n-7FP pQim{*~---s:ҚR.F8G 2�ǚ /_OkC|mmu-_ @x}+\w}U �nݢEs==\j!8TD}3c&g~K8μΣ֯õf�׿b Gȇd�~۷þ+3 MQәq~%5DGػ=b[{ }MAe�?#o@v{CqÆ K<ߵ1B-~_2یw}xesj+vdھ{ U&-`aGC3� T ZϚ.7O(Wwi1d Ց.aܞOP|/ٻ]fGDٻ-ݘTR�p?éٯ]]ynڴinG{ |�@=*~c*f]*Ief�db]#6  68og<nnDhx!_a<e^{*PI/+WSO=5 07WVO5p5O43d~=OYE%G+N`e@�>iL� Wf_M3�2}p{ |6;>2f?I9EXۻ}:#)�g}?yǭ>%A±o=4ѲƯ AHx5<\^)>B+7$`LD|"P ӟ_`}UqJ �>Zs :@0,X&Z%&~ГCu.̀�n^ǟB2�&L<_AlKyތLUݩ<*N_�`gqFj˗:*Ա'=/n}Zf̡=3�x Y=`L@b`J/J �QSN9=39;\}n=qABRQ3<[ �~s${֎O|fؔYg 4ẗ$~Q%/1w]~W_MMܱj*wg&q< *-'p_*o!rf\e;a^8`}<ο*>/2sA\ �  7zԤw}={v 17wF�Ǎ|}]Ȓ�Rlf�.�(~8נXӻ;%|ϻ)�@#3g[tij厗_~]znɉpݷ~;pzp-)Q<d�7,UN,`ǻПuxW>h7$gf"dߡJ) @JTΓ裏s&~U##�\ɾVq�c{([ͥe[ nƹ 7w B@ c;v'hd0iR{Y7z/gC @qO.\^|ԤK,q3gLl F|kjV >߾cU:Z< IߙvO ʄd�~gC Ll �� X |? !0?\_2$ct7 #�Y|ف/N͹&nTy�XRչZ1"l~�%3�|3A{>�bѽp7r2Cl�7ߟt` 0CsS1U`yi;sѩW3 L-6 M1X 3}]< 3;+'\ @OwO<Djc`nUK@/s}*L$ð q[#nfg�i}eBxOK(3shhSNu\rI<OFV;Y oeZ0@a tə/رs� ZG 3C �2kb0}t_$c򕓇9~`}u^{[n]jŏ;3T)MHſZ_.k)@x @Ȱ\2�!#0m܇5e j0Vl٩if0K 9nJ �.3@4@W� d` ݙY DlNGM_>o9@Wǘwt7=R� =8#<xG@Qp_KfE[A@V *|`wqx3C>�1"SD4 |_Y>^sgWmNz�� &XeKP䨚�$= Ra\�Fyk`Nʸ%7OW^<@Qq1 Eǧ)l^C^JvS#6Yg%U� {*q M~abXDد,, ds �s+'Lw?pw}f];*%�DYW`lipG?hb񧀠d?h]2�ĆMB}{ P~E3�3x_X`/3fkƽIwuW4_ʃ7-M�.)'uKi{d�x.S\2 �= \;X �4�`Iy睗Px10] o T!0)@*> f�Ha0aM�p3搵 KEQ(F et7 -�pO8bŊԌa)STj�?~a#@W q^ �lcahG6>lEt}fYIC|bH�\t&͚5tM;w'Um\r*2�s  -�I\L$@QU;cH_�zk /qLs9')V39�2R`3�TƝGe1 e` &mg˦]n„h�,r`*e�!�HiO;.M^{핈I `T<بi �9 ("+4 ?Ѯi`�hZsu8@$ݦ �stA~]9X Oˁ-�XE+P?A ]էZ2[+:`7{ =(8mWNg߱j��d3Xڛ׬Yq]vYCHKA!&W˞,{\ f�s�^̘C4%2W|vL* Od�Y9j��`>=ө;70[BG`nJdz_9y\c}2 Zк؝Y։0W膮6D JEwJ!id 4 R@_m|Ԅl..Cٞ�U uF>efE"k:}!`M_y,.A_yy6!vo rE~^ƠR@^ ?-Zs:"{cs,@emz}qZIWN�{%^ݧ̊,ڙؐ%}ȃ M�J �C=t}>o _Gdp_�d+un#N2+`O ,/2� uf@1AXY^w}G0()ԓXT.@}&]`<E] fn$2`n_KfU[ Z1*ȁYW`+#מlg4,ibò2n,8ef2;EgExW|U[|Ħ,/Z?8e\Ur@ �J,b}_K)p@ D`3:\O �v:,(gjևѣ@ |x gy�\[_\fEwF+͒^Kug0 ,fiQ>8 *C�p8�+fx'ݼya,+ZDKb&jKg1O߹Eք! 4MSfD_)K{!E tz&twZ_›LEm**)�@~s|dFC_|.�|�Lצ`7ࢣ27�yvzd]דuCȬv#=�ϹM' nf7,^f㔍Ie�p<@c /0YO WVh;H641Z%HV2p}xӸ)3d3)Z*-�φzR(kѺCJX6,dXآ* �zv! 1$83V dJUE*/�~,~=T)Wp @2,מ?[%`lՆI\r#XP֫gC@ ȑ\xкX,oՎ0�BWZVIK�ǘZA?>�{ *9)R}njoU0.�U�}q`.+]a$Ɵ=S#�`t4VbmKN09溇E_a-va X&sdgBa!ܖz$kn`;�ϱ9 ;D4Q � Y�; ,MEi5ke[.ܼv3| )KTe̶֭~�-& |u'lJ'!Vhn_y%BTu#F�T>}z.`U;.{z0D�f;ˆasBαh?c^Z$*x@Xm(}vxd2� Y<9ٻE~1yFmVV�MV\ߦ?̙t|ϛ-Cs(M?]>8!i?X07wx-ѻm$$�9+ơ ѭ)IOCPCP2�6t pʋ @E @9pqYgy0˲6�+r0wO50hH�phUc=VP2�{ a;T�&-[̭^0͙EQ}+a0о Ym @LD)�۸.$ V֏lHhng{7SV�xU@__ |ub#j)E%2� �Ѱ$#͐ �|I hX Sg0e�A �|pؠ͘J=C�| �6vmޘH�DB /8*@\=ň@4,8 [(f$�aa��?6jTakا�" +#PJiĥ #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@�1�!"F DH� #"b$�BD@A�^" !�+~(ս??{g2a>D�F?As8q",I u{.5 ommfba8~D@K/3f̐9B��]cwKjC_!h\<57+SS?8Ʉv-_k)P03^mKb#5W3:c ����IENDB`(���@������� ���������������������&&&!((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))'''''')))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++++**(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()) X U)) (((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((!!! ((((((((((((((((((((((((((((((((((((((((((((((((((((((++����++(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((BBBGGGGGGGGGGGGGGGGGGGGGDDD)))'''((((((((((((((((((((((((((((((((((((((((((((((((**\���� ** (('((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&&&&&&...$$$((&****))$((')) )) ))!((())$)) )) )) )) )) )) ++$$?��&&1)) ****((%(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&&&&&&...%% **!""L S''.**##D##C$$>))%&&4##C##C##C##C##C##C$$>''.����##B** S!!N))&**(()(((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((;;;?????????????????????<<<))&))#����))$''0 ����q ���� S++(()(((((((((((((((((((((((((((((((((((((((((((((((()))ooo```""")))((("""!!!!!!!!!!!!!!!!!!!!!"""**l����������������%%;��������������((,������������%%<)) ((((((((((((((((((((((((((((((((((((((((((((((((NNN---'''(((((((((((((((((((((((((((++����e����f##D����z**~������r����++((((((((((((((((((((((((((((((((((((((((((((((((UUUjjj)))(((((((((((((((((((((++����.... ����--++-- ���� W.. ++`����))!..f����))$((&((((((((((((((((((((((((((((((((((((((((((((()))&&&???)))((((((((((((((((((++����++++x����++(((++����++)) ##B��''0**&&5��%%9))"(((((((((((((((((((((((((((((((((((((((((((((((()))$$$<<<kkk((((((((((((((((((++e���� ****Y���� ))"((&++����++))$''-����""I**((&���� T**(((((((((((((((((((((((((((((((((((((((((((((((((((((()))zzz((((((((((((((((((**""H����'',**$$<��&&4))#**_���� ** ((&** ����e++++ ����u++(((((((((((((((((((((((((((((((((((((((((((((((((((&&&---}}}(((((((((((((((((())$''0��##E,,(()����""L**)) $$B��''0))$++����-- --����++((((((((((((((((((((((((((((((((((((((((((((())) sssZZZ((((((((((((((((((((((((++ ���� %%9m������k++))$'',����""H**++����""J%%9����++(((((((((((((((((((((((((((((((((((((((((((((PPP$$$(((((((((((((((((((((((((((**b��������������++(('** ����f++**b��������������q++(((((((((((((((((((((((((((((((((((((((((((((XXXBBB$$$)))((((((((((((((((((((((((((((((**��������++(((++����++**##E���������� ((+))$((((((((((((((((((((((((((((((((((((((((((((((((|||)))(((((((((((((((((((((((((((((((((((((('++""H&&6!!R���� ))!((&++����++(('((+ini))&[z''1))!(()((((((((((((((((((((((((((((((((((((((((((((((((((('''((((((((((((((((((((((((((((((((((((((((((((((((**++++++$$@��&&2))#,,]���� **(('(('++++++((&**++++))#(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())%((*����##D''2 ����''/))$(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('++ ����h&&3����������""L**(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((**| b'', !!O**((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++++))#(((++++++++++))%(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))&&&5((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((***(((����***0))))))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())))))%%%(���0���`���� ���������������������'''u((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((())))))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&))$))%(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''.''/'',(('(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((++ ))"(('(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((&&&MMMYYYYYYYYYYYYYYYOOO'''((((('(('(((((((((((((((((((((((((((++n��&&2))#(('((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''""$((********++))#))#++++++++,,""H����##F,,**++))%(()(((((((((((((((((((((((((((((((((((()))$$$&&&((($$$mmmqqq((T p !!Oc ""M��q ((())#((((((((((((((((((((((((((((((((((((###===xxx###)))))&��������������������""L ��������**(('(((((((((((((((((((((((((((((((((OOO""")))(((((((((((())!$$>��%%;��''/""I��!!O** ��""I����""G**((((((((((((((((((((((((((((((((($$$<<<,,,)))((((((((())"%%7��^00���**-- n��** ,,�� ,,))$��t++((((((((((((((((((((((((((((((((()))!!!MMMGGG (((((((((((&))$��..f��&&5++ S��##C,,l��''1++ ��++(((((((((((((((((((((((((((((((((((()))!!! FFFnnn((((((((((((++ ��..##C����!!M++%%9��a,,!!N����##D..�� ++((((((((((((((((((((((((((((((((()))555...%%%((((((((((((++��%%:X����l++))%��,,&&4��,,�� **(('((((((((((((((((((((((((((((((yyyOOO$$$)))(((((((((((()) $$@����++++ ��,,))"��������++(((((((((((((((((((((((((((((((((~~~((((((((((((((((((((((()**[ ��++++�� **** ##E)) ((((((((((((((((((((((((((((((((('''(((777%%%)))(((((((((((((((((((((((((()**))#''0-- �� ))",,l��'',))$((&((())#**''0))&)) (()((((((((((((((((((((((((((((((((((((&&&"""(((((((((((((((((((((((((((((((((((((((((&))$++j��(()����##C)) ((&((%((&(('))$((&(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((**""I����$$>������b++((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('((*""I""J''/$$<""J""J""J&&5))%(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('****((&))"******))%(()((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((('''((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((����''')))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))(((r(��� ���@���� ���������������������'''((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((''')))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++((&(()((((((((((((((((((((((((((((((((((((((())))))((((((((((((((((((((()) \''/((%(((((((((((((((((((((((((((((((((((((((###^^^xxxxxxxxx``a$$"********))"++++..��""J--**((((((((((((((((((((((((((())) i]\ R""Fmhdk��ug!!Q**((((((((((((((((((((('''$$$wwwIII !!! %%z���� ����t����^**((((((((((((((((((&&&###"""''''''**��&&4&&2!!N��++ ''. ++(((((((((((((((((((((&&&[[[...$$$(((++��''0g��""H))".. .. ))&((&(((((((((((((((((('''MMM000$$$(((**[��i R��m,, .. ��((&��&&6))#(((((((((((((((&&&###&&&###(((((())%(()����..++z����((*))$((((((((((((((('''$$$UUU$$$)))(((((((())) %%9h //��%%9%%8h T)) ((((((((((((((((((((())))))((((((((((((((((()))"++-- ""L��c++++**++**(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((**sW W**(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()**++))#++++))#(()(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((%%%)''')))(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((***'''(������0���� ���������������������'''((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('**((%(()((((((((((((((((((((((((((())))))((((((((((((((&''-X''0((&((((((((((((((((((((((((%%%!!!dddffh##++++**++,,** _.. **(()(((((((((((((((***+++ooouujck !!N**(((((((((((($$$111 u��}** (('((((((((((((###777$$$&&&++ |g-- V ''-$$=))"((((((((('''&&&fff###'''++**  S����!!K**((((((((($$$222]]]'''((())#&&2 ''. ((+ ((*((%(((((((((((('''((((((((((()))#++** y**++++**((&(()((((((((((((((('''(((((((((((((((((((('))$^ Qre(()(('((((((((((((((((((((((((((((((((((((((((((((((((((((((**)) ++**((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''g((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((()))((((������ ���� ���������������������'''(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((())%((&(((((((((((((((((()))**'((&((&))$''/(()((%((((((((()))%%%[[[^^[!!$'',''.))!}++))$(()((($$$>>>ggg???\\WFFT  ''.))%(((&&&'''7zti**(((###;;;___""!)) u X**((('''000---)))((&)) ^~))!&&5((%((((((((('''&&&(((((((((((&((&%%9##B&&3))#))$(((((((((((((((((((((((((((((((('))#))"))%(()((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((('''(((((((((((((((((((((((((((((((((((((((((('''���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/make_changelog.py������������������������������������������������������0000664�0000000�0000000�00000043730�14675634471�0021046�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from __future__ import annotations # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import enum import itertools import json import logging import re from collections import defaultdict from dataclasses import dataclass from functools import lru_cache from pathlib import Path from devscripts.utils import read_file, run_process, write_file BASE_URL = 'https://github.com' LOCATION_PATH = Path(__file__).parent HASH_LENGTH = 7 logger = logging.getLogger(__name__) class CommitGroup(enum.Enum): PRIORITY = 'Important' CORE = 'Core' EXTRACTOR = 'Extractor' DOWNLOADER = 'Downloader' POSTPROCESSOR = 'Postprocessor' NETWORKING = 'Networking' MISC = 'Misc.' @classmethod @lru_cache def subgroup_lookup(cls): return { name: group for group, names in { cls.MISC: { 'build', 'ci', 'cleanup', 'devscripts', 'docs', 'test', }, cls.NETWORKING: { 'rh', }, }.items() for name in names } @classmethod @lru_cache def group_lookup(cls): result = { 'fd': cls.DOWNLOADER, 'ie': cls.EXTRACTOR, 'pp': cls.POSTPROCESSOR, 'upstream': cls.CORE, } result.update({item.name.lower(): item for item in iter(cls)}) return result @classmethod def get(cls, value: str) -> tuple[CommitGroup | None, str | None]: group, _, subgroup = (group.strip().lower() for group in value.partition('/')) result = cls.group_lookup().get(group) if not result: if subgroup: return None, value subgroup = group result = cls.subgroup_lookup().get(subgroup) return result, subgroup or None @dataclass class Commit: hash: str | None short: str authors: list[str] def __str__(self): result = f'{self.short!r}' if self.hash: result += f' ({self.hash[:HASH_LENGTH]})' if self.authors: authors = ', '.join(self.authors) result += f' by {authors}' return result @dataclass class CommitInfo: details: str | None sub_details: tuple[str, ...] message: str issues: list[str] commit: Commit fixes: list[Commit] def key(self): return ((self.details or '').lower(), self.sub_details, self.message) def unique(items): return sorted({item.strip().lower(): item for item in items if item}.values()) class Changelog: MISC_RE = re.compile(r'(?:^|\b)(?:lint(?:ing)?|misc|format(?:ting)?|fixes)(?:\b|$)', re.IGNORECASE) ALWAYS_SHOWN = (CommitGroup.PRIORITY,) def __init__(self, groups, repo, collapsible=False): self._groups = groups self._repo = repo self._collapsible = collapsible def __str__(self): return '\n'.join(self._format_groups(self._groups)).replace('\t', ' ') def _format_groups(self, groups): first = True for item in CommitGroup: if self._collapsible and item not in self.ALWAYS_SHOWN and first: first = False yield '\n<details><summary><h3>Changelog</h3></summary>\n' group = groups[item] if group: yield self.format_module(item.value, group) if self._collapsible: yield '\n</details>' def format_module(self, name, group): result = f'\n#### {name} changes\n' if name else '\n' return result + '\n'.join(self._format_group(group)) def _format_group(self, group): sorted_group = sorted(group, key=CommitInfo.key) detail_groups = itertools.groupby(sorted_group, lambda item: (item.details or '').lower()) for _, items in detail_groups: items = list(items) details = items[0].details if details == 'cleanup': items = self._prepare_cleanup_misc_items(items) prefix = '-' if details: if len(items) == 1: prefix = f'- **{details}**:' else: yield f'- **{details}**' prefix = '\t-' sub_detail_groups = itertools.groupby(items, lambda item: tuple(map(str.lower, item.sub_details))) for sub_details, entries in sub_detail_groups: if not sub_details: for entry in entries: yield f'{prefix} {self.format_single_change(entry)}' continue entries = list(entries) sub_prefix = f'{prefix} {", ".join(entries[0].sub_details)}' if len(entries) == 1: yield f'{sub_prefix}: {self.format_single_change(entries[0])}' continue yield sub_prefix for entry in entries: yield f'\t{prefix} {self.format_single_change(entry)}' def _prepare_cleanup_misc_items(self, items): cleanup_misc_items = defaultdict(list) sorted_items = [] for item in items: if self.MISC_RE.search(item.message): cleanup_misc_items[tuple(item.commit.authors)].append(item) else: sorted_items.append(item) for commit_infos in cleanup_misc_items.values(): sorted_items.append(CommitInfo( 'cleanup', ('Miscellaneous',), ', '.join( self._format_message_link(None, info.commit.hash) for info in sorted(commit_infos, key=lambda item: item.commit.hash or '')), [], Commit(None, '', commit_infos[0].commit.authors), [])) return sorted_items def format_single_change(self, info: CommitInfo): message, sep, rest = info.message.partition('\n') if '[' not in message: # If the message doesn't already contain markdown links, try to add a link to the commit message = self._format_message_link(message, info.commit.hash) if info.issues: message = f'{message} ({self._format_issues(info.issues)})' if info.commit.authors: message = f'{message} by {self._format_authors(info.commit.authors)}' if info.fixes: fix_message = ', '.join(f'{self._format_message_link(None, fix.hash)}' for fix in info.fixes) authors = sorted({author for fix in info.fixes for author in fix.authors}, key=str.casefold) if authors != info.commit.authors: fix_message = f'{fix_message} by {self._format_authors(authors)}' message = f'{message} (With fixes in {fix_message})' return message if not sep else f'{message}{sep}{rest}' def _format_message_link(self, message, commit_hash): assert message or commit_hash, 'Improperly defined commit message or override' message = message if message else commit_hash[:HASH_LENGTH] return f'[{message}]({self.repo_url}/commit/{commit_hash})' if commit_hash else message def _format_issues(self, issues): return ', '.join(f'[#{issue}]({self.repo_url}/issues/{issue})' for issue in issues) @staticmethod def _format_authors(authors): return ', '.join(f'[{author}]({BASE_URL}/{author})' for author in authors) @property def repo_url(self): return f'{BASE_URL}/{self._repo}' class CommitRange: COMMAND = 'git' COMMIT_SEPARATOR = '-----' AUTHOR_INDICATOR_RE = re.compile(r'Authored by:? ', re.IGNORECASE) MESSAGE_RE = re.compile(r''' (?:\[(?P<prefix>[^\]]+)\]\ )? (?:(?P<sub_details>`?[\w.-]+`?): )? (?P<message>.+?) (?:\ \((?P<issues>\#\d+(?:,\ \#\d+)*)\))? ''', re.VERBOSE | re.DOTALL) EXTRACTOR_INDICATOR_RE = re.compile(r'(?:Fix|Add)\s+Extractors?', re.IGNORECASE) REVERT_RE = re.compile(r'(?:\[[^\]]+\]\s+)?(?i:Revert)\s+([\da-f]{40})') FIXES_RE = re.compile(r'(?i:Fix(?:es)?(?:\s+bugs?)?(?:\s+in|\s+for)?|Revert|Improve)\s+([\da-f]{40})') UPSTREAM_MERGE_RE = re.compile(r'Update to ytdl-commit-([\da-f]+)') def __init__(self, start, end, default_author=None): self._start, self._end = start, end self._commits, self._fixes = self._get_commits_and_fixes(default_author) self._commits_added = [] def __iter__(self): return iter(itertools.chain(self._commits.values(), self._commits_added)) def __len__(self): return len(self._commits) + len(self._commits_added) def __contains__(self, commit): if isinstance(commit, Commit): if not commit.hash: return False commit = commit.hash return commit in self._commits def _get_commits_and_fixes(self, default_author): result = run_process( self.COMMAND, 'log', f'--format=%H%n%s%n%b%n{self.COMMIT_SEPARATOR}', f'{self._start}..{self._end}' if self._start else self._end).stdout commits, reverts = {}, {} fixes = defaultdict(list) lines = iter(result.splitlines(False)) for i, commit_hash in enumerate(lines): short = next(lines) skip = short.startswith('Release ') or short == '[version] update' authors = [default_author] if default_author else [] for line in iter(lambda: next(lines), self.COMMIT_SEPARATOR): match = self.AUTHOR_INDICATOR_RE.match(line) if match: authors = sorted(map(str.strip, line[match.end():].split(',')), key=str.casefold) commit = Commit(commit_hash, short, authors) if skip and (self._start or not i): logger.debug(f'Skipped commit: {commit}') continue elif skip: logger.debug(f'Reached Release commit, breaking: {commit}') break revert_match = self.REVERT_RE.fullmatch(commit.short) if revert_match: reverts[revert_match.group(1)] = commit continue fix_match = self.FIXES_RE.search(commit.short) if fix_match: commitish = fix_match.group(1) fixes[commitish].append(commit) commits[commit.hash] = commit for commitish, revert_commit in reverts.items(): reverted = commits.pop(commitish, None) if reverted: logger.debug(f'{commitish} fully reverted {reverted}') else: commits[revert_commit.hash] = revert_commit for commitish, fix_commits in fixes.items(): if commitish in commits: hashes = ', '.join(commit.hash[:HASH_LENGTH] for commit in fix_commits) logger.info(f'Found fix(es) for {commitish[:HASH_LENGTH]}: {hashes}') for fix_commit in fix_commits: del commits[fix_commit.hash] else: logger.debug(f'Commit with fixes not in changes: {commitish[:HASH_LENGTH]}') return commits, fixes def apply_overrides(self, overrides): for override in overrides: when = override.get('when') if when and when not in self and when != self._start: logger.debug(f'Ignored {when!r} override') continue override_hash = override.get('hash') or when if override['action'] == 'add': commit = Commit(override.get('hash'), override['short'], override.get('authors') or []) logger.info(f'ADD {commit}') self._commits_added.append(commit) elif override['action'] == 'remove': if override_hash in self._commits: logger.info(f'REMOVE {self._commits[override_hash]}') del self._commits[override_hash] elif override['action'] == 'change': if override_hash not in self._commits: continue commit = Commit(override_hash, override['short'], override.get('authors') or []) logger.info(f'CHANGE {self._commits[commit.hash]} -> {commit}') self._commits[commit.hash] = commit self._commits = dict(reversed(self._commits.items())) def groups(self): group_dict = defaultdict(list) for commit in self: upstream_re = self.UPSTREAM_MERGE_RE.search(commit.short) if upstream_re: commit.short = f'[upstream] Merged with youtube-dl {upstream_re.group(1)}' match = self.MESSAGE_RE.fullmatch(commit.short) if not match: logger.error(f'Error parsing short commit message: {commit.short!r}') continue prefix, sub_details_alt, message, issues = match.groups() issues = [issue.strip()[1:] for issue in issues.split(',')] if issues else [] if prefix: groups, details, sub_details = zip(*map(self.details_from_prefix, prefix.split(','))) group = next(iter(filter(None, groups)), None) details = ', '.join(unique(details)) sub_details = list(itertools.chain.from_iterable(sub_details)) else: group = CommitGroup.CORE details = None sub_details = [] if sub_details_alt: sub_details.append(sub_details_alt) sub_details = tuple(unique(sub_details)) if not group: if self.EXTRACTOR_INDICATOR_RE.search(commit.short): group = CommitGroup.EXTRACTOR logger.error(f'Assuming [ie] group for {commit.short!r}') else: group = CommitGroup.CORE commit_info = CommitInfo( details, sub_details, message.strip(), issues, commit, self._fixes[commit.hash]) logger.debug(f'Resolved {commit.short!r} to {commit_info!r}') group_dict[group].append(commit_info) return group_dict @staticmethod def details_from_prefix(prefix): if not prefix: return CommitGroup.CORE, None, () prefix, *sub_details = prefix.split(':') group, details = CommitGroup.get(prefix) if group is CommitGroup.PRIORITY and details: details = details.partition('/')[2].strip() if details and '/' in details: logger.error(f'Prefix is overnested, using first part: {prefix}') details = details.partition('/')[0].strip() if details == 'common': details = None elif group is CommitGroup.NETWORKING and details == 'rh': details = 'Request Handler' return group, details, sub_details def get_new_contributors(contributors_path, commits): contributors = set() if contributors_path.exists(): for line in read_file(contributors_path).splitlines(): author, _, _ = line.strip().partition(' (') authors = author.split('/') contributors.update(map(str.casefold, authors)) new_contributors = set() for commit in commits: for author in commit.authors: author_folded = author.casefold() if author_folded not in contributors: contributors.add(author_folded) new_contributors.add(author) return sorted(new_contributors, key=str.casefold) def create_changelog(args): logging.basicConfig( datefmt='%Y-%m-%d %H-%M-%S', format='{asctime} | {levelname:<8} | {message}', level=logging.WARNING - 10 * args.verbosity, style='{', stream=sys.stderr) commits = CommitRange(None, args.commitish, args.default_author) if not args.no_override: if args.override_path.exists(): overrides = json.loads(read_file(args.override_path)) commits.apply_overrides(overrides) else: logger.warning(f'File {args.override_path.as_posix()} does not exist') logger.info(f'Loaded {len(commits)} commits') new_contributors = get_new_contributors(args.contributors_path, commits) if new_contributors: if args.contributors: write_file(args.contributors_path, '\n'.join(new_contributors) + '\n', mode='a') logger.info(f'New contributors: {", ".join(new_contributors)}') return Changelog(commits.groups(), args.repo, args.collapsible) def create_parser(): import argparse parser = argparse.ArgumentParser( description='Create a changelog markdown from a git commit range') parser.add_argument( 'commitish', default='HEAD', nargs='?', help='The commitish to create the range from (default: %(default)s)') parser.add_argument( '-v', '--verbosity', action='count', default=0, help='increase verbosity (can be used twice)') parser.add_argument( '-c', '--contributors', action='store_true', help='update CONTRIBUTORS file (default: %(default)s)') parser.add_argument( '--contributors-path', type=Path, default=LOCATION_PATH.parent / 'CONTRIBUTORS', help='path to the CONTRIBUTORS file') parser.add_argument( '--no-override', action='store_true', help='skip override json in commit generation (default: %(default)s)') parser.add_argument( '--override-path', type=Path, default=LOCATION_PATH / 'changelog_override.json', help='path to the changelog_override.json file') parser.add_argument( '--default-author', default='pukkandan', help='the author to use without a author indicator (default: %(default)s)') parser.add_argument( '--repo', default='yt-dlp/yt-dlp', help='the github repository to use for the operations (default: %(default)s)') parser.add_argument( '--collapsible', action='store_true', help='make changelog collapsible (default: %(default)s)') return parser if __name__ == '__main__': print(create_changelog(create_parser().parse_args())) ����������������������������������������yt-dlp-2024.09.27/devscripts/make_contributing.py���������������������������������������������������0000775�0000000�0000000�00000001373�14675634471�0021626�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 import optparse import re def main(): return # This is unused in yt-dlp parser = optparse.OptionParser(usage='%prog INFILE OUTFILE') options, args = parser.parse_args() if len(args) != 2: parser.error('Expected an input and an output filename') infile, outfile = args with open(infile, encoding='utf-8') as inf: readme = inf.read() bug_text = re.search( r'(?s)#\s*BUGS\s*[^\n]*\s*(.*?)#\s*COPYRIGHT', readme).group(1) dev_text = re.search( r'(?s)(#\s*DEVELOPER INSTRUCTIONS.*?)#\s*EMBEDDING yt-dlp', readme).group(1) out = bug_text + dev_text with open(outfile, 'w', encoding='utf-8') as outf: outf.write(out) if __name__ == '__main__': main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/make_issue_template.py�������������������������������������������������0000664�0000000�0000000�00000006040�14675634471�0022133�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import re from devscripts.utils import get_filename_args, read_file, write_file VERBOSE_TMPL = ''' - type: checkboxes id: verbose attributes: label: Provide verbose output that clearly demonstrates the problem options: - label: Run **your** yt-dlp command with **-vU** flag added (`yt-dlp -vU <your command line>`) required: true - label: "If using API, add `'verbose': True` to `YoutubeDL` params instead" required: false - label: Copy the WHOLE output (starting with `[debug] Command-line config`) and insert it below required: true - type: textarea id: log attributes: label: Complete Verbose Output description: | It should start like this: placeholder: | [debug] Command-line config: ['-vU', 'https://www.youtube.com/watch?v=BaW_jenozKc'] [debug] Encodings: locale cp65001, fs utf-8, pref cp65001, out utf-8, error utf-8, screen utf-8 [debug] yt-dlp version nightly@... from yt-dlp/yt-dlp [b634ba742] (win_exe) [debug] Python 3.8.10 (CPython 64bit) - Windows-10-10.0.22000-SP0 [debug] exe versions: ffmpeg N-106550-g072101bd52-20220410 (fdk,setts), ffprobe N-106624-g391ce570c8-20220415, phantomjs 2.1.1 [debug] Optional libraries: Cryptodome-3.15.0, brotli-1.0.9, certifi-2022.06.15, mutagen-1.45.1, sqlite3-2.6.0, websockets-10.3 [debug] Proxy map: {} [debug] Request Handlers: urllib, requests [debug] Loaded 1893 extractors [debug] Fetching release info: https://api.github.com/repos/yt-dlp/yt-dlp-nightly-builds/releases/latest yt-dlp is up to date (nightly@... from yt-dlp/yt-dlp-nightly-builds) [youtube] Extracting URL: https://www.youtube.com/watch?v=BaW_jenozKc <more lines> render: shell validations: required: true - type: markdown attributes: value: | > [!CAUTION] > ### GitHub is experiencing a high volume of malicious spam comments. > ### If you receive any replies asking you download a file, do NOT follow the download links! > > Note that this issue may be temporarily locked as an anti-spam measure after it is opened. '''.strip() NO_SKIP = ''' - type: checkboxes attributes: label: DO NOT REMOVE OR SKIP THE ISSUE TEMPLATE description: Fill all fields even if you think it is irrelevant for the issue options: - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\\* field required: true '''.strip() def main(): fields = {'no_skip': NO_SKIP} fields['verbose'] = VERBOSE_TMPL % fields fields['verbose_optional'] = re.sub(r'(\n\s+validations:)?\n\s+required: true', '', fields['verbose']) infile, outfile = get_filename_args(has_infile=True) write_file(outfile, read_file(infile) % fields) if __name__ == '__main__': main() ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/make_lazy_extractors.py������������������������������������������������0000664�0000000�0000000�00000010675�14675634471�0022356�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import shutil import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from inspect import getsource from devscripts.utils import get_filename_args, read_file, write_file NO_ATTR = object() STATIC_CLASS_PROPERTIES = [ 'IE_NAME', '_ENABLED', '_VALID_URL', # Used for URL matching '_WORKING', 'IE_DESC', '_NETRC_MACHINE', 'SEARCH_KEY', # Used for --extractor-descriptions 'age_limit', # Used for --age-limit (evaluated) '_RETURN_TYPE', # Accessed in CLI only with instance (evaluated) ] CLASS_METHODS = [ 'ie_key', 'suitable', '_match_valid_url', # Used for URL matching 'working', 'get_temp_id', '_match_id', # Accessed just before instance creation 'description', # Used for --extractor-descriptions 'is_suitable', # Used for --age-limit 'supports_login', 'is_single_video', # Accessed in CLI only with instance ] IE_TEMPLATE = ''' class {name}({bases}): _module = {module!r} ''' MODULE_TEMPLATE = read_file('devscripts/lazy_load_template.py') def main(): lazy_extractors_filename = get_filename_args(default_outfile='yt_dlp/extractor/lazy_extractors.py') if os.path.exists(lazy_extractors_filename): os.remove(lazy_extractors_filename) _ALL_CLASSES = get_all_ies() # Must be before import import yt_dlp.plugins from yt_dlp.extractor.common import InfoExtractor, SearchInfoExtractor # Filter out plugins _ALL_CLASSES = [cls for cls in _ALL_CLASSES if not cls.__module__.startswith(f'{yt_dlp.plugins.PACKAGE_NAME}.')] DummyInfoExtractor = type('InfoExtractor', (InfoExtractor,), {'IE_NAME': NO_ATTR}) module_src = '\n'.join(( MODULE_TEMPLATE, ' _module = None', *extra_ie_code(DummyInfoExtractor), '\nclass LazyLoadSearchExtractor(LazyLoadExtractor):\n pass\n', *build_ies(_ALL_CLASSES, (InfoExtractor, SearchInfoExtractor), DummyInfoExtractor), )) write_file(lazy_extractors_filename, f'{module_src}\n') def get_all_ies(): PLUGINS_DIRNAME = 'ytdlp_plugins' BLOCKED_DIRNAME = f'{PLUGINS_DIRNAME}_blocked' if os.path.exists(PLUGINS_DIRNAME): # os.rename cannot be used, e.g. in Docker. See https://github.com/yt-dlp/yt-dlp/pull/4958 shutil.move(PLUGINS_DIRNAME, BLOCKED_DIRNAME) try: from yt_dlp.extractor.extractors import _ALL_CLASSES finally: if os.path.exists(BLOCKED_DIRNAME): shutil.move(BLOCKED_DIRNAME, PLUGINS_DIRNAME) return _ALL_CLASSES def extra_ie_code(ie, base=None): for var in STATIC_CLASS_PROPERTIES: val = getattr(ie, var) if val != (getattr(base, var) if base else NO_ATTR): yield f' {var} = {val!r}' yield '' for name in CLASS_METHODS: f = getattr(ie, name) if not base or f.__func__ != getattr(base, name).__func__: yield getsource(f) def build_ies(ies, bases, attr_base): names = [] for ie in sort_ies(ies, bases): yield build_lazy_ie(ie, ie.__name__, attr_base) if ie in ies: names.append(ie.__name__) yield f'\n_ALL_CLASSES = [{", ".join(names)}]' def sort_ies(ies, ignored_bases): """find the correct sorting and add the required base classes so that subclasses can be correctly created""" classes, returned_classes = ies[:-1], set() assert ies[-1].__name__ == 'GenericIE', 'Last IE must be GenericIE' while classes: for c in classes[:]: bases = set(c.__bases__) - {object, *ignored_bases} restart = False for b in sorted(bases, key=lambda x: x.__name__): if b not in classes and b not in returned_classes: assert b.__name__ != 'GenericIE', 'Cannot inherit from GenericIE' classes.insert(0, b) restart = True if restart: break if bases <= returned_classes: yield c returned_classes.add(c) classes.remove(c) break yield ies[-1] def build_lazy_ie(ie, name, attr_base): bases = ', '.join({ 'InfoExtractor': 'LazyLoadExtractor', 'SearchInfoExtractor': 'LazyLoadSearchExtractor', }.get(base.__name__, base.__name__) for base in ie.__bases__) s = IE_TEMPLATE.format(name=name, module=ie.__module__, bases=bases) return s + '\n'.join(extra_ie_code(ie, attr_base)) if __name__ == '__main__': main() �������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/make_readme.py���������������������������������������������������������0000775�0000000�0000000�00000005522�14675634471�0020354�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 """ yt-dlp --help | make_readme.py This must be run in a console of correct width """ # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import functools import re from devscripts.utils import read_file, write_file README_FILE = 'README.md' OPTIONS_START = 'General Options:' OPTIONS_END = 'CONFIGURATION' EPILOG_START = 'See full documentation' ALLOWED_OVERSHOOT = 2 DISABLE_PATCH = object() def take_section(text, start=None, end=None, *, shift=0): return text[ text.index(start) + shift if start else None: text.index(end) + shift if end else None ] def apply_patch(text, patch): return text if patch[0] is DISABLE_PATCH else re.sub(*patch, text) options = take_section(sys.stdin.read(), f'\n {OPTIONS_START}', f'\n{EPILOG_START}', shift=1) max_width = max(map(len, options.split('\n'))) switch_col_width = len(re.search(r'(?m)^\s{5,}', options).group()) delim = f'\n{" " * switch_col_width}' PATCHES = ( ( # Standardize `--update` message r'(?m)^( -U, --update\s+).+(\n \s.+)*$', r'\1Update this program to the latest version', ), ( # Headings r'(?m)^ (\w.+\n)( (?=\w))?', r'## \1', ), ( # Fixup `--date` formatting rf'(?m)( --date DATE.+({delim}[^\[]+)*)\[.+({delim}.+)*$', (rf'\1[now|today|yesterday][-N[day|week|month|year]].{delim}' f'E.g. "--date today-2weeks" downloads only{delim}' 'videos uploaded on the same day two weeks ago'), ), ( # Do not split URLs rf'({delim[:-1]})? (?P<label>\[\S+\] )?(?P<url>https?({delim})?:({delim})?/({delim})?/(({delim})?\S+)+)\s', lambda mobj: ''.join((delim, mobj.group('label') or '', re.sub(r'\s+', '', mobj.group('url')), '\n')), ), ( # Do not split "words" rf'(?m)({delim}\S+)+$', lambda mobj: ''.join((delim, mobj.group(0).replace(delim, ''))), ), ( # Allow overshooting last line rf'(?m)^(?P<prev>.+)${delim}(?P<current>.+)$(?!{delim})', lambda mobj: (mobj.group().replace(delim, ' ') if len(mobj.group()) - len(delim) + 1 <= max_width + ALLOWED_OVERSHOOT else mobj.group()), ), ( # Avoid newline when a space is available b/w switch and description DISABLE_PATCH, # This creates issues with prepare_manpage r'(?m)^(\s{4}-.{%d})(%s)' % (switch_col_width - 6, delim), r'\1 ', ), ( # Replace brackets with a Markdown link r'SponsorBlock API \((http.+)\)', r'[SponsorBlock API](\1)', ), ) readme = read_file(README_FILE) write_file(README_FILE, ''.join(( take_section(readme, end=f'## {OPTIONS_START}'), functools.reduce(apply_patch, PATCHES, options), take_section(readme, f'# {OPTIONS_END}'), ))) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/make_supportedsites.py�������������������������������������������������0000664�0000000�0000000�00000000745�14675634471�0022213�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from devscripts.utils import get_filename_args, write_file from yt_dlp.extractor import list_extractor_classes def main(): out = '\n'.join(ie.description() for ie in list_extractor_classes() if ie.IE_DESC is not False) write_file(get_filename_args(), f'# Supported sites\n{out}\n') if __name__ == '__main__': main() ���������������������������yt-dlp-2024.09.27/devscripts/prepare_manpage.py�����������������������������������������������������0000664�0000000�0000000�00000006647�14675634471�0021256�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import os.path import re from devscripts.utils import ( compose_functions, get_filename_args, read_file, write_file, ) ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) README_FILE = os.path.join(ROOT_DIR, 'README.md') PREFIX = r'''%yt-dlp(1) # NAME yt\-dlp \- A feature\-rich command\-line audio/video downloader # SYNOPSIS **yt-dlp** \[OPTIONS\] URL [URL...] # DESCRIPTION ''' def filter_excluded_sections(readme): EXCLUDED_SECTION_BEGIN_STRING = re.escape('<!-- MANPAGE: BEGIN EXCLUDED SECTION -->') EXCLUDED_SECTION_END_STRING = re.escape('<!-- MANPAGE: END EXCLUDED SECTION -->') return re.sub( rf'(?s){EXCLUDED_SECTION_BEGIN_STRING}.+?{EXCLUDED_SECTION_END_STRING}\n', '', readme) def _convert_code_blocks(readme): current_code_block = None for line in readme.splitlines(True): if current_code_block: if line == current_code_block: current_code_block = None yield '\n' else: yield f' {line}' elif line.startswith('```'): current_code_block = line.count('`') * '`' + '\n' yield '\n' else: yield line def convert_code_blocks(readme): return ''.join(_convert_code_blocks(readme)) def move_sections(readme): MOVE_TAG_TEMPLATE = '<!-- MANPAGE: MOVE "%s" SECTION HERE -->' sections = re.findall(r'(?m)^%s$' % ( re.escape(MOVE_TAG_TEMPLATE).replace(r'\%', '%') % '(.+)'), readme) for section_name in sections: move_tag = MOVE_TAG_TEMPLATE % section_name if readme.count(move_tag) > 1: raise Exception(f'There is more than one occurrence of "{move_tag}". This is unexpected') sections = re.findall(rf'(?sm)(^# {re.escape(section_name)}.+?)(?=^# )', readme) if len(sections) < 1: raise Exception(f'The section {section_name} does not exist') elif len(sections) > 1: raise Exception(f'There are multiple occurrences of section {section_name}, this is unhandled') readme = readme.replace(sections[0], '', 1).replace(move_tag, sections[0], 1) return readme def filter_options(readme): section = re.search(r'(?sm)^# USAGE AND OPTIONS\n.+?(?=^# )', readme).group(0) section_new = section.replace('*', R'\*') options = '# OPTIONS\n' for line in section_new.split('\n')[1:]: mobj = re.fullmatch(r'''(?x) \s{4}(?P<opt>-(?:,\s|[^\s])+) (?:\s(?P<meta>(?:[^\s]|\s(?!\s))+))? (\s{2,}(?P<desc>.+))? ''', line) if not mobj: options += f'{line.lstrip()}\n' continue option, metavar, description = mobj.group('opt', 'meta', 'desc') # Pandoc's definition_lists. See http://pandoc.org/README.html option = f'{option} *{metavar}*' if metavar else option description = f'{description}\n' if description else '' options += f'\n{option}\n: {description}' continue return readme.replace(section, options, 1) TRANSFORM = compose_functions(filter_excluded_sections, convert_code_blocks, move_sections, filter_options) def main(): write_file(get_filename_args(), PREFIX + TRANSFORM(read_file(README_FILE))) if __name__ == '__main__': main() �����������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/run_tests.py�����������������������������������������������������������0000775�0000000�0000000�00000004352�14675634471�0020150�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 import argparse import functools import os import re import shlex import subprocess import sys from pathlib import Path fix_test_name = functools.partial(re.compile(r'IE(_all|_\d+)?$').sub, r'\1') def parse_args(): parser = argparse.ArgumentParser(description='Run selected yt-dlp tests') parser.add_argument( 'test', help='a extractor tests, or one of "core" or "download"', nargs='*') parser.add_argument( '-k', help='run a test matching EXPRESSION. Same as "pytest -k"', metavar='EXPRESSION') parser.add_argument( '--pytest-args', help='arguments to passthrough to pytest') return parser.parse_args() def run_tests(*tests, pattern=None, ci=False): run_core = 'core' in tests or (not pattern and not tests) run_download = 'download' in tests tests = list(map(fix_test_name, tests)) pytest_args = args.pytest_args or os.getenv('HATCH_TEST_ARGS', '') arguments = ['pytest', '-Werror', '--tb=short', *shlex.split(pytest_args)] if ci: arguments.append('--color=yes') if pattern: arguments.extend(['-k', pattern]) if run_core: arguments.extend(['-m', 'not download']) elif run_download: arguments.extend(['-m', 'download']) else: arguments.extend( f'test/test_download.py::TestDownload::test_{test}' for test in tests) print(f'Running {arguments}', flush=True) try: return subprocess.call(arguments) except FileNotFoundError: pass arguments = [sys.executable, '-Werror', '-m', 'unittest'] if pattern: arguments.extend(['-k', pattern]) if run_core: print('"pytest" needs to be installed to run core tests', file=sys.stderr, flush=True) return 1 elif run_download: arguments.append('test.test_download') else: arguments.extend( f'test.test_download.TestDownload.test_{test}' for test in tests) print(f'Running {arguments}', flush=True) return subprocess.call(arguments) if __name__ == '__main__': try: args = parse_args() os.chdir(Path(__file__).parent.parent) sys.exit(run_tests(*args.test, pattern=args.k, ci=bool(os.getenv('CI')))) except KeyboardInterrupt: pass ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/set-variant.py���������������������������������������������������������0000664�0000000�0000000�00000001665�14675634471�0020360�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import argparse import functools import re from devscripts.utils import compose_functions, read_file, write_file VERSION_FILE = 'yt_dlp/version.py' def parse_options(): parser = argparse.ArgumentParser(description='Set the build variant of the package') parser.add_argument('variant', help='Name of the variant') parser.add_argument('-M', '--update-message', default=None, help='Message to show in -U') return parser.parse_args() def property_setter(name, value): return functools.partial(re.sub, rf'(?m)^{name}\s*=\s*.+$', f'{name} = {value!r}') opts = parse_options() transform = compose_functions( property_setter('VARIANT', opts.variant), property_setter('UPDATE_HINT', opts.update_message), ) write_file(VERSION_FILE, transform(read_file(VERSION_FILE))) ���������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/tomlparse.py�����������������������������������������������������������0000775�0000000�0000000�00000011361�14675634471�0020126�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 """ Simple parser for spec compliant toml files A simple toml parser for files that comply with the spec. Should only be used to parse `pyproject.toml` for `install_deps.py`. IMPORTANT: INVALID FILES OR MULTILINE STRINGS ARE NOT SUPPORTED! """ from __future__ import annotations import datetime as dt import json import re WS = r'(?:[\ \t]*)' STRING_RE = re.compile(r'"(?:\\.|[^\\"\n])*"|\'[^\'\n]*\'') SINGLE_KEY_RE = re.compile(rf'{STRING_RE.pattern}|[A-Za-z0-9_-]+') KEY_RE = re.compile(rf'{WS}(?:{SINGLE_KEY_RE.pattern}){WS}(?:\.{WS}(?:{SINGLE_KEY_RE.pattern}){WS})*') EQUALS_RE = re.compile(rf'={WS}') WS_RE = re.compile(WS) _SUBTABLE = rf'(?P<subtable>^\[(?P<is_list>\[)?(?P<path>{KEY_RE.pattern})\]\]?)' EXPRESSION_RE = re.compile(rf'^(?:{_SUBTABLE}|{KEY_RE.pattern}=)', re.MULTILINE) LIST_WS_RE = re.compile(rf'{WS}((#[^\n]*)?\n{WS})*') LEFTOVER_VALUE_RE = re.compile(r'[^,}\]\t\n#]+') def parse_key(value: str): for match in SINGLE_KEY_RE.finditer(value): if match[0][0] == '"': yield json.loads(match[0]) elif match[0][0] == '\'': yield match[0][1:-1] else: yield match[0] def get_target(root: dict, paths: list[str], is_list=False): target = root for index, key in enumerate(paths, 1): use_list = is_list and index == len(paths) result = target.get(key) if result is None: result = [] if use_list else {} target[key] = result if isinstance(result, dict): target = result elif use_list: target = {} result.append(target) else: target = result[-1] assert isinstance(target, dict) return target def parse_enclosed(data: str, index: int, end: str, ws_re: re.Pattern): index += 1 if match := ws_re.match(data, index): index = match.end() while data[index] != end: index = yield True, index if match := ws_re.match(data, index): index = match.end() if data[index] == ',': index += 1 if match := ws_re.match(data, index): index = match.end() assert data[index] == end yield False, index + 1 def parse_value(data: str, index: int): if data[index] == '[': result = [] indices = parse_enclosed(data, index, ']', LIST_WS_RE) valid, index = next(indices) while valid: index, value = parse_value(data, index) result.append(value) valid, index = indices.send(index) return index, result if data[index] == '{': result = {} indices = parse_enclosed(data, index, '}', WS_RE) valid, index = next(indices) while valid: valid, index = indices.send(parse_kv_pair(data, index, result)) return index, result if match := STRING_RE.match(data, index): return match.end(), json.loads(match[0]) if match[0][0] == '"' else match[0][1:-1] match = LEFTOVER_VALUE_RE.match(data, index) assert match value = match[0].strip() for func in [ int, float, dt.time.fromisoformat, dt.date.fromisoformat, dt.datetime.fromisoformat, {'true': True, 'false': False}.get, ]: try: value = func(value) break except Exception: pass return match.end(), value def parse_kv_pair(data: str, index: int, target: dict): match = KEY_RE.match(data, index) if not match: return None *keys, key = parse_key(match[0]) match = EQUALS_RE.match(data, match.end()) assert match index = match.end() index, value = parse_value(data, index) get_target(target, keys)[key] = value return index def parse_toml(data: str): root = {} target = root index = 0 while True: match = EXPRESSION_RE.search(data, index) if not match: break if match.group('subtable'): index = match.end() path, is_list = match.group('path', 'is_list') target = get_target(root, list(parse_key(path)), bool(is_list)) continue index = parse_kv_pair(data, match.start(), target) assert index is not None return root def main(): import argparse from pathlib import Path parser = argparse.ArgumentParser() parser.add_argument('infile', type=Path, help='The TOML file to read as input') args = parser.parse_args() with args.infile.open('r', encoding='utf-8') as file: data = file.read() def default(obj): if isinstance(obj, (dt.date, dt.time, dt.datetime)): return obj.isoformat() print(json.dumps(parse_toml(data), default=default)) if __name__ == '__main__': main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/update-version.py������������������������������������������������������0000664�0000000�0000000�00000004476�14675634471�0021073�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import argparse import contextlib import datetime as dt import sys from devscripts.utils import read_version, run_process, write_file def get_new_version(version, revision): if not version: version = dt.datetime.now(dt.timezone.utc).strftime('%Y.%m.%d') if revision: assert revision.isdecimal(), 'Revision must be a number' else: old_version = read_version().split('.') if version.split('.') == old_version[:3]: revision = str(int(([*old_version, 0])[3]) + 1) return f'{version}.{revision}' if revision else version def get_git_head(): with contextlib.suppress(Exception): return run_process('git', 'rev-parse', 'HEAD').stdout.strip() VERSION_TEMPLATE = '''\ # Autogenerated by devscripts/update-version.py __version__ = {version!r} RELEASE_GIT_HEAD = {git_head!r} VARIANT = None UPDATE_HINT = None CHANNEL = {channel!r} ORIGIN = {origin!r} _pkg_version = {package_version!r} ''' if __name__ == '__main__': parser = argparse.ArgumentParser(description='Update the version.py file') parser.add_argument( '-c', '--channel', default='stable', help='Select update channel (default: %(default)s)') parser.add_argument( '-r', '--origin', default='local', help='Select origin/repository (default: %(default)s)') parser.add_argument( '-s', '--suffix', default='', help='Add an alphanumeric suffix to the package version, e.g. "dev"') parser.add_argument( '-o', '--output', default='yt_dlp/version.py', help='The output file to write to (default: %(default)s)') parser.add_argument( 'version', nargs='?', default=None, help='A version or revision to use instead of generating one') args = parser.parse_args() git_head = get_git_head() version = ( args.version if args.version and '.' in args.version else get_new_version(None, args.version)) write_file(args.output, VERSION_TEMPLATE.format( version=version, git_head=git_head, channel=args.channel, origin=args.origin, package_version=f'{version}{args.suffix}')) print(f'version={version} ({args.channel}), head={git_head}') ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/update_changelog.py����������������������������������������������������0000775�0000000�0000000�00000001714�14675634471�0021412�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from pathlib import Path from devscripts.make_changelog import create_changelog, create_parser from devscripts.utils import read_file, read_version, write_file # Always run after devscripts/update-version.py, and run before `make doc|pypi-files|tar|all` if __name__ == '__main__': parser = create_parser() parser.description = 'Update an existing changelog file with an entry for a new release' parser.add_argument( '--changelog-path', type=Path, default=Path(__file__).parent.parent / 'Changelog.md', help='path to the Changelog file') args = parser.parse_args() new_entry = create_changelog(args) header, sep, changelog = read_file(args.changelog_path).partition('\n### ') write_file(args.changelog_path, f'{header}{sep}{read_version()}\n{new_entry}\n{sep}{changelog}') ����������������������������������������������������yt-dlp-2024.09.27/devscripts/utils.py���������������������������������������������������������������0000664�0000000�0000000�00000002522�14675634471�0017254�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import argparse import functools import subprocess def read_file(fname): with open(fname, encoding='utf-8') as f: return f.read() def write_file(fname, content, mode='w'): with open(fname, mode, encoding='utf-8') as f: return f.write(content) def read_version(fname='yt_dlp/version.py', varname='__version__'): """Get the version without importing the package""" items = {} exec(compile(read_file(fname), fname, 'exec'), items) return items[varname] def get_filename_args(has_infile=False, default_outfile=None): parser = argparse.ArgumentParser() if has_infile: parser.add_argument('infile', help='Input file') kwargs = {'nargs': '?', 'default': default_outfile} if default_outfile else {} parser.add_argument('outfile', **kwargs, help='Output file') opts = parser.parse_args() if has_infile: return opts.infile, opts.outfile return opts.outfile def compose_functions(*functions): return lambda x: functools.reduce(lambda y, f: f(y), functions, x) def run_process(*args, **kwargs): kwargs.setdefault('text', True) kwargs.setdefault('check', True) kwargs.setdefault('capture_output', True) if kwargs['text']: kwargs.setdefault('encoding', 'utf-8') kwargs.setdefault('errors', 'replace') return subprocess.run(args, **kwargs) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/zsh-completion.in������������������������������������������������������0000664�0000000�0000000�00000001540�14675634471�0021044�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#compdef yt-dlp __yt_dlp() { local curcontext="$curcontext" fileopts diropts cur prev typeset -A opt_args fileopts="{{fileopts}}" diropts="{{diropts}}" cur=$words[CURRENT] case $cur in :) _arguments '*: :(::ytfavorites ::ytrecommended ::ytsubscriptions ::ytwatchlater ::ythistory)' ;; *) prev=$words[CURRENT-1] if [[ ${prev} =~ ${fileopts} ]]; then _path_files elif [[ ${prev} =~ ${diropts} ]]; then _path_files -/ elif [[ ${prev} == "--remux-video" ]]; then _arguments '*: :(mp4 mkv)' elif [[ ${prev} == "--recode-video" ]]; then _arguments '*: :(mp4 flv ogg webm mkv)' else _arguments '*: :({{flags}})' fi ;; esac } __yt_dlp����������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/devscripts/zsh-completion.py������������������������������������������������������0000775�0000000�0000000�00000002545�14675634471�0021077�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import yt_dlp ZSH_COMPLETION_FILE = 'completions/zsh/_yt-dlp' ZSH_COMPLETION_TEMPLATE = 'devscripts/zsh-completion.in' def build_completion(opt_parser): opts = [opt for group in opt_parser.option_groups for opt in group.option_list] opts_file = [opt for opt in opts if opt.metavar == 'FILE'] opts_dir = [opt for opt in opts if opt.metavar == 'DIR'] fileopts = [] for opt in opts_file: if opt._short_opts: fileopts.extend(opt._short_opts) if opt._long_opts: fileopts.extend(opt._long_opts) diropts = [] for opt in opts_dir: if opt._short_opts: diropts.extend(opt._short_opts) if opt._long_opts: diropts.extend(opt._long_opts) flags = [opt.get_opt_string() for opt in opts] with open(ZSH_COMPLETION_TEMPLATE) as f: template = f.read() template = template.replace('{{fileopts}}', '|'.join(fileopts)) template = template.replace('{{diropts}}', '|'.join(diropts)) template = template.replace('{{flags}}', ' '.join(flags)) with open(ZSH_COMPLETION_FILE, 'w') as f: f.write(template) parser = yt_dlp.parseOpts(ignore_config_files=True)[0] build_completion(parser) �����������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/public.key������������������������������������������������������������������������0000664�0000000�0000000�00000003214�14675634471�0015343�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN PGP PUBLIC KEY BLOCK----- mQINBGP78C4BEAD0rF9zjGPAt0thlt5C1ebzccAVX7Nb1v+eqQjk+WEZdTETVCg3 WAM5ngArlHdm/fZqzUgO+pAYrB60GKeg7ffUDf+S0XFKEZdeRLYeAaqqKhSibVal DjvOBOztu3W607HLETQAqA7wTPuIt2WqmpL60NIcyr27LxqmgdN3mNvZ2iLO+bP0 nKR/C+PgE9H4ytywDa12zMx6PmZCnVOOOu6XZEFmdUxxdQ9fFDqd9LcBKY2LDOcS Yo1saY0YWiZWHtzVoZu1kOzjnS5Fjq/yBHJLImDH7pNxHm7s/PnaurpmQFtDFruk t+2lhDnpKUmGr/I/3IHqH/X+9nPoS4uiqQ5HpblB8BK+4WfpaiEg75LnvuOPfZIP KYyXa/0A7QojMwgOrD88ozT+VCkKkkJ+ijXZ7gHNjmcBaUdKK7fDIEOYI63Lyc6Q WkGQTigFffSUXWHDCO9aXNhP3ejqFWgGMtCUsrbkcJkWuWY7q5ARy/05HbSM3K4D U9eqtnxmiV1WQ8nXuI9JgJQRvh5PTkny5LtxqzcmqvWO9TjHBbrs14BPEO9fcXxK L/CFBbzXDSvvAgArdqqlMoncQ/yicTlfL6qzJ8EKFiqW14QMTdAn6SuuZTodXCTi InwoT7WjjuFPKKdvfH1GP4bnqdzTnzLxCSDIEtfyfPsIX+9GI7Jkk/zZjQARAQAB tDdTaW1vbiBTYXdpY2tpICh5dC1kbHAgc2lnbmluZyBrZXkpIDxjb250YWN0QGdy dWI0ay54eXo+iQJOBBMBCgA4FiEErAy75oSNaoc0ZK9OV89lkztadYEFAmP78C4C GwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQV89lkztadYEVqQ//cW7TxhXg 7Xbh2EZQzXml0egn6j8QaV9KzGragMiShrlvTO2zXfLXqyizrFP4AspgjSn/4NrI 8mluom+Yi+qr7DXT4BjQqIM9y3AjwZPdywe912Lxcw52NNoPZCm24I9T7ySc8lmR FQvZC0w4H/VTNj/2lgJ1dwMflpwvNRiWa5YzcFGlCUeDIPskLx9++AJE+xwU3LYm jQQsPBqpHHiTBEJzMLl+rfd9Fg4N+QNzpFkTDW3EPerLuvJniSBBwZthqxeAtw4M UiAXh6JvCc2hJkKCoygRfM281MeolvmsGNyQm+axlB0vyldiPP6BnaRgZlx+l6MU cPqgHblb7RW5j9lfr6OYL7SceBIHNv0CFrt1OnkGo/tVMwcs8LH3Ae4a7UJlIceL V54aRxSsZU7w4iX+PB79BWkEsQzwKrUuJVOeL4UDwWajp75OFaUqbS/slDDVXvK5 OIeuth3mA/adjdvgjPxhRQjA3l69rRWIJDrqBSHldmRsnX6cvXTDy8wSXZgy51lP m4IVLHnCy9m4SaGGoAsfTZS0cC9FgjUIyTyrq9M67wOMpUxnuB0aRZgJE1DsI23E qdvcSNVlO+39xM/KPWUEh6b83wMn88QeW+DCVGWACQq5N3YdPnAJa50617fGbY6I gXIoRHXkDqe23PZ/jURYCv0sjVtjPoVC+bg= =bJkn -----END PGP PUBLIC KEY BLOCK----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/pyproject.toml��������������������������������������������������������������������0000664�0000000�0000000�00000023412�14675634471�0016271�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "yt-dlp" maintainers = [ {name = "pukkandan", email = "pukkandan.ytdlp@gmail.com"}, {name = "Grub4K", email = "contact@grub4k.xyz"}, {name = "bashonly", email = "bashonly@protonmail.com"}, {name = "coletdjnz", email = "coletdjnz@protonmail.com"}, {name = "sepro", email = "sepro@sepr0.com"}, ] description = "A feature-rich command-line audio/video downloader" readme = "README.md" requires-python = ">=3.8" keywords = [ "youtube-dl", "video-downloader", "youtube-downloader", "sponsorblock", "youtube-dlc", "yt-dlp", ] license = {file = "LICENSE"} classifiers = [ "Topic :: Multimedia :: Video", "Development Status :: 5 - Production/Stable", "Environment :: Console", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: Implementation", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "License :: OSI Approved :: The Unlicense (Unlicense)", "Operating System :: OS Independent", ] dynamic = ["version"] dependencies = [ "brotli; implementation_name=='cpython'", "brotlicffi; implementation_name!='cpython'", "certifi", "mutagen", "pycryptodomex", "requests>=2.32.2,<3", "urllib3>=1.26.17,<3", "websockets>=13.0", ] [project.optional-dependencies] default = [] curl-cffi = [ "curl-cffi==0.5.10; os_name=='nt' and implementation_name=='cpython'", "curl-cffi>=0.5.10,!=0.6.*,<0.7.2; os_name!='nt' and implementation_name=='cpython'", ] secretstorage = [ "cffi", "secretstorage", ] build = [ "build", "hatchling", "pip", "setuptools>=71.0.2", # 71.0.0 broke pyinstaller "wheel", ] dev = [ "pre-commit", "yt-dlp[static-analysis]", "yt-dlp[test]", ] static-analysis = [ "autopep8~=2.0", "ruff~=0.6.0", ] test = [ "pytest~=8.1", ] pyinstaller = [ "pyinstaller>=6.10.0", # Windows temp cleanup fixed in 6.10.0 ] py2exe = [ "py2exe>=0.12", ] [project.urls] Documentation = "https://github.com/yt-dlp/yt-dlp#readme" Repository = "https://github.com/yt-dlp/yt-dlp" Tracker = "https://github.com/yt-dlp/yt-dlp/issues" Funding = "https://github.com/yt-dlp/yt-dlp/blob/master/Collaborators.md#collaborators" [project.scripts] yt-dlp = "yt_dlp:main" [project.entry-points.pyinstaller40] hook-dirs = "yt_dlp.__pyinstaller:get_hook_dirs" [tool.hatch.build.targets.sdist] include = [ "/yt_dlp", "/devscripts", "/test", "/.gitignore", # included by default, needed for auto-excludes "/Changelog.md", "/LICENSE", # included as license "/pyproject.toml", # included by default "/README.md", # included as readme "/setup.cfg", "/supportedsites.md", ] artifacts = [ "/yt_dlp/extractor/lazy_extractors.py", "/completions", "/AUTHORS", # included by default "/README.txt", "/yt-dlp.1", ] [tool.hatch.build.targets.wheel] packages = ["yt_dlp"] artifacts = ["/yt_dlp/extractor/lazy_extractors.py"] [tool.hatch.build.targets.wheel.shared-data] "completions/bash/yt-dlp" = "share/bash-completion/completions/yt-dlp" "completions/zsh/_yt-dlp" = "share/zsh/site-functions/_yt-dlp" "completions/fish/yt-dlp.fish" = "share/fish/vendor_completions.d/yt-dlp.fish" "README.txt" = "share/doc/yt_dlp/README.txt" "yt-dlp.1" = "share/man/man1/yt-dlp.1" [tool.hatch.version] path = "yt_dlp/version.py" pattern = "_pkg_version = '(?P<version>[^']+)'" [tool.hatch.envs.default] features = ["curl-cffi", "default"] dependencies = ["pre-commit"] path = ".venv" installer = "uv" [tool.hatch.envs.default.scripts] setup = "pre-commit install --config .pre-commit-hatch.yaml" yt-dlp = "python -Werror -Xdev -m yt_dlp {args}" [tool.hatch.envs.hatch-static-analysis] detached = true features = ["static-analysis"] dependencies = [] # override hatch ruff version config-path = "pyproject.toml" [tool.hatch.envs.hatch-static-analysis.scripts] format-check = "autopep8 --diff {args:.}" format-fix = "autopep8 --in-place {args:.}" lint-check = "ruff check {args:.}" lint-fix = "ruff check --fix {args:.}" [tool.hatch.envs.hatch-test] features = ["test"] dependencies = [ "pytest-randomly~=3.15", "pytest-rerunfailures~=14.0", "pytest-xdist[psutil]~=3.5", ] [tool.hatch.envs.hatch-test.scripts] run = "python -m devscripts.run_tests {args}" run-cov = "echo Code coverage not implemented && exit 1" [[tool.hatch.envs.hatch-test.matrix]] python = [ "3.8", "3.9", "3.10", "3.11", "3.12", "pypy3.8", "pypy3.9", "pypy3.10", ] [tool.ruff] line-length = 120 [tool.ruff.lint] ignore = [ "E402", # module-import-not-at-top-of-file "E501", # line-too-long "E731", # lambda-assignment "E741", # ambiguous-variable-name "UP036", # outdated-version-block "B006", # mutable-argument-default "B008", # function-call-in-default-argument "B011", # assert-false "B017", # assert-raises-exception "B023", # function-uses-loop-variable (false positives) "B028", # no-explicit-stacklevel "B904", # raise-without-from-inside-except "C401", # unnecessary-generator-set "C402", # unnecessary-generator-dict "PIE790", # unnecessary-placeholder "SIM102", # collapsible-if "SIM108", # if-else-block-instead-of-if-exp "SIM112", # uncapitalized-environment-variables "SIM113", # enumerate-for-loop "SIM114", # if-with-same-arms "SIM115", # open-file-with-context-handler "SIM117", # multiple-with-statements "SIM223", # expr-and-false "SIM300", # yoda-conditions "TD001", # invalid-todo-tag "TD002", # missing-todo-author "TD003", # missing-todo-link "PLE0604", # invalid-all-object (false positives) "PLE0643", # potential-index-error (false positives) "PLW0603", # global-statement "PLW1510", # subprocess-run-without-check "PLW2901", # redefined-loop-name "RUF001", # ambiguous-unicode-character-string "RUF012", # mutable-class-default "RUF100", # unused-noqa (flake8 has slightly different behavior) ] select = [ "E", # pycodestyle Error "W", # pycodestyle Warning "F", # Pyflakes "I", # isort "Q", # flake8-quotes "N803", # invalid-argument-name "N804", # invalid-first-argument-name-for-class-method "UP", # pyupgrade "B", # flake8-bugbear "A", # flake8-builtins "COM", # flake8-commas "C4", # flake8-comprehensions "FA", # flake8-future-annotations "ISC", # flake8-implicit-str-concat "ICN003", # banned-import-from "PIE", # flake8-pie "T20", # flake8-print "RSE", # flake8-raise "RET504", # unnecessary-assign "SIM", # flake8-simplify "TID251", # banned-api "TD", # flake8-todos "PLC", # Pylint Convention "PLE", # Pylint Error "PLW", # Pylint Warning "RUF", # Ruff-specific rules ] [tool.ruff.lint.per-file-ignores] "devscripts/lazy_load_template.py" = [ "F401", # unused-import ] "!yt_dlp/extractor/**.py" = [ "I", # isort "ICN003", # banned-import-from "T20", # flake8-print "A002", # builtin-argument-shadowing "C408", # unnecessary-collection-call ] "yt_dlp/jsinterp.py" = [ "UP031", # printf-string-formatting ] [tool.ruff.lint.isort] known-first-party = [ "bundle", "devscripts", "test", ] relative-imports-order = "closest-to-furthest" [tool.ruff.lint.flake8-quotes] docstring-quotes = "double" multiline-quotes = "single" inline-quotes = "single" avoid-escape = false [tool.ruff.lint.pep8-naming] classmethod-decorators = [ "yt_dlp.utils.classproperty", ] [tool.ruff.lint.flake8-import-conventions] banned-from = [ "base64", "datetime", "functools", "glob", "hashlib", "itertools", "json", "math", "os", "pathlib", "random", "re", "string", "sys", "time", "urllib.parse", "uuid", "xml", ] [tool.ruff.lint.flake8-tidy-imports.banned-api] "yt_dlp.compat.compat_str".msg = "Use `str` instead." "yt_dlp.compat.compat_b64decode".msg = "Use `base64.b64decode` instead." "yt_dlp.compat.compat_urlparse".msg = "Use `urllib.parse` instead." "yt_dlp.compat.compat_parse_qs".msg = "Use `urllib.parse.parse_qs` instead." "yt_dlp.compat.compat_urllib_parse_unquote".msg = "Use `urllib.parse.unquote` instead." "yt_dlp.compat.compat_urllib_parse_urlencode".msg = "Use `urllib.parse.urlencode` instead." "yt_dlp.compat.compat_urllib_parse_urlparse".msg = "Use `urllib.parse.urlparse` instead." "yt_dlp.compat.compat_shlex_quote".msg = "Use `yt_dlp.utils.shell_quote` instead." "yt_dlp.utils.error_to_compat_str".msg = "Use `str` instead." [tool.autopep8] max_line_length = 120 recursive = true exit-code = true jobs = 0 select = [ "E101", "E112", "E113", "E115", "E116", "E117", "E121", "E122", "E123", "E124", "E125", "E126", "E127", "E128", "E129", "E131", "E201", "E202", "E203", "E211", "E221", "E222", "E223", "E224", "E225", "E226", "E227", "E228", "E231", "E241", "E242", "E251", "E252", "E261", "E262", "E265", "E266", "E271", "E272", "E273", "E274", "E275", "E301", "E302", "E303", "E304", "E305", "E306", "E502", "E701", "E702", "E704", "W391", "W504", ] [tool.pytest.ini_options] addopts = "-ra -v --strict-markers" markers = [ "download", ] ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/setup.cfg�������������������������������������������������������������������������0000664�0000000�0000000�00000001535�14675634471�0015200�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������[flake8] exclude = build,venv,.tox,.git,.pytest_cache ignore = E402,E501,E731,E741,W503 max_line_length = 120 per_file_ignores = devscripts/lazy_load_template.py: F401 [autoflake] ignore-init-module-imports = true ignore-pass-after-docstring = true remove-all-unused-imports = true remove-duplicate-keys = true remove-unused-variables = true [tox:tox] skipsdist = true envlist = py{38,39,310,311,312},pypy{38,39,310} skip_missing_interpreters = true [testenv] # tox deps = pytest commands = pytest {posargs:"-m not download"} passenv = HOME # For test_compat_expanduser setenv = # PYTHONWARNINGS = error # Catches PIP's warnings too [isort] py_version = 38 multi_line_output = VERTICAL_HANGING_INDENT line_length = 80 reverse_relative = true ensure_newline_before_comments = true include_trailing_comma = true known_first_party = test �������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/supportedsites.md�����������������������������������������������������������������0000664�0000000�0000000�00000157303�14675634471�0017003�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Supported sites - **17live** - **17live:clip** - **1News**: 1news.co.nz article videos - **1tv**: Первый канал - **20min** - **23video** - **247sports**: (**Currently broken**) - **24tv.ua** - **3qsdn**: 3Q SDN - **3sat** - **4tube** - **56.com** - **6play** - **7plus** - **8tracks** - **9c9media** - **9gag**: 9GAG - **9News** - **9now.com.au** - **abc.net.au** - **abc.net.au:iview** - **abc.net.au:​iview:showseries** - **abcnews** - **abcnews:video** - **abcotvs**: ABC Owned Television Stations - **abcotvs:clips** - **AbemaTV**: [*abematv*](## "netrc machine") - **AbemaTVTitle**: [*abematv*](## "netrc machine") - **AcademicEarth:Course** - **acast** - **acast:channel** - **AcFunBangumi** - **AcFunVideo** - **ADN**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network - **ADNSeason**: [*animationdigitalnetwork*](## "netrc machine") Animation Digital Network - **AdobeConnect** - **adobetv** - **adobetv:channel** - **adobetv:embed** - **adobetv:show** - **adobetv:video** - **AdultSwim** - **aenetworks**: A+E Networks: A&E, Lifetime, History.com, FYI Network and History Vault - **aenetworks:collection** - **aenetworks:show** - **AeonCo** - **afreecatv**: [*afreecatv*](## "netrc machine") afreecatv.com - **afreecatv:catchstory**: [*afreecatv*](## "netrc machine") afreecatv.com catch story - **afreecatv:live**: [*afreecatv*](## "netrc machine") afreecatv.com livestreams - **afreecatv:user** - **AirTV** - **AitubeKZVideo** - **AliExpressLive** - **AlJazeera** - **Allocine** - **Allstar** - **AllstarProfile** - **AlphaPorno** - **Alsace20TV** - **Alsace20TVEmbed** - **altcensored** - **altcensored:channel** - **Alura**: [*alura*](## "netrc machine") - **AluraCourse**: [*aluracourse*](## "netrc machine") - **AmadeusTV** - **Amara** - **AmazonMiniTV** - **amazonminitv:season**: Amazon MiniTV Season, "minitv:season:" prefix - **amazonminitv:series**: Amazon MiniTV Series, "minitv:series:" prefix - **AmazonReviews** - **AmazonStore** - **AMCNetworks** - **AmericasTestKitchen** - **AmericasTestKitchenSeason** - **AmHistoryChannel** - **AnchorFMEpisode** - **anderetijden**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **Angel** - **AnimalPlanet** - **ant1newsgr:article**: ant1news.gr articles - **ant1newsgr:embed**: ant1news.gr embedded videos - **antenna:watch**: antenna.gr and ant1news.gr videos - **Anvato** - **aol.com**: Yahoo screen and movies (**Currently broken**) - **APA** - **Aparat** - **AppleConnect** - **AppleDaily**: 臺灣蘋果日報 - **ApplePodcasts** - **appletrailers** - **appletrailers:section** - **archive.org**: archive.org video and audio - **ArcPublishing** - **ARD** - **ARDMediathek** - **ARDMediathekCollection** - **Arkena** - **Art19** - **Art19Show** - **arte.sky.it** - **ArteTV** - **ArteTVCategory** - **ArteTVEmbed** - **ArteTVPlaylist** - **asobichannel**: ASOBI CHANNEL - **asobichannel:tag**: ASOBI CHANNEL - **AsobiStage**: ASOBISTAGE (アソビステージ) - **AtresPlayer**: [*atresplayer*](## "netrc machine") - **AtScaleConfEvent** - **ATVAt** - **AudiMedia** - **AudioBoom** - **Audiodraft:custom** - **Audiodraft:generic** - **audiomack** - **audiomack:album** - **Audius**: Audius.co - **audius:artist**: Audius.co profile/artist pages - **audius:playlist**: Audius.co playlists - **audius:track**: Audius track ID or API link. Prepend with "audius:" - **AWAAN** - **awaan:live** - **awaan:season** - **awaan:video** - **axs.tv** - **AZMedien**: AZ Medien videos - **BaiduVideo**: 百度视频 - **BanBye** - **BanByeChannel** - **bandaichannel** - **Bandcamp** - **Bandcamp:album** - **Bandcamp:user** - **Bandcamp:weekly** - **BannedVideo** - **bbc**: [*bbc*](## "netrc machine") BBC - **bbc.co.uk**: [*bbc*](## "netrc machine") BBC iPlayer - **bbc.co.uk:article**: BBC articles - **bbc.co.uk:​iplayer:episodes** - **bbc.co.uk:​iplayer:group** - **bbc.co.uk:playlist** - **BBVTV**: [*bbvtv*](## "netrc machine") - **BBVTVLive**: [*bbvtv*](## "netrc machine") - **BBVTVRecordings**: [*bbvtv*](## "netrc machine") - **BeaconTv** - **BeatBumpPlaylist** - **BeatBumpVideo** - **Beatport** - **Beeg** - **BehindKink**: (**Currently broken**) - **Bellator** - **BellMedia** - **BerufeTV** - **Bet**: (**Currently broken**) - **bfi:player**: (**Currently broken**) - **bfmtv** - **bfmtv:article** - **bfmtv:live** - **bibeltv:live**: BibelTV live program - **bibeltv:series**: BibelTV series playlist - **bibeltv:video**: BibelTV single video - **Bigflix** - **Bigo** - **Bild**: Bild.de - **BiliBili** - **Bilibili category extractor** - **BilibiliAudio** - **BilibiliAudioAlbum** - **BiliBiliBangumi** - **BiliBiliBangumiMedia** - **BiliBiliBangumiSeason** - **BilibiliCheese** - **BilibiliCheeseSeason** - **BilibiliCollectionList** - **BilibiliFavoritesList** - **BiliBiliPlayer** - **BilibiliPlaylist** - **BiliBiliSearch**: Bilibili video search; "bilisearch:" prefix - **BilibiliSeriesList** - **BilibiliSpaceAudio** - **BilibiliSpaceVideo** - **BilibiliWatchlater** - **BiliIntl**: [*biliintl*](## "netrc machine") - **biliIntl:series**: [*biliintl*](## "netrc machine") - **BiliLive** - **BioBioChileTV** - **Biography** - **BitChute** - **BitChuteChannel** - **BlackboardCollaborate** - **BleacherReport**: (**Currently broken**) - **BleacherReportCMS**: (**Currently broken**) - **blerp** - **blogger.com** - **Bloomberg** - **BokeCC** - **BongaCams** - **Boosty** - **BostonGlobe** - **Box** - **BoxCastVideo** - **Bpb**: Bundeszentrale für politische Bildung - **BR**: Bayerischer Rundfunk (**Currently broken**) - **BrainPOP**: [*brainpop*](## "netrc machine") - **BrainPOPELL**: [*brainpop*](## "netrc machine") - **BrainPOPEsp**: [*brainpop*](## "netrc machine") BrainPOP Español - **BrainPOPFr**: [*brainpop*](## "netrc machine") BrainPOP Français - **BrainPOPIl**: [*brainpop*](## "netrc machine") BrainPOP Hebrew - **BrainPOPJr**: [*brainpop*](## "netrc machine") - **BravoTV** - **BreitBart** - **brightcove:legacy** - **brightcove:new** - **Brilliantpala:Classes**: [*brilliantpala*](## "netrc machine") VoD on classes.brilliantpala.org - **Brilliantpala:Elearn**: [*brilliantpala*](## "netrc machine") VoD on elearn.brilliantpala.org - **bt:article**: Bergens Tidende Articles - **bt:vestlendingen**: Bergens Tidende - Vestlendingen - **Bundesliga** - **Bundestag** - **BusinessInsider** - **BuzzFeed** - **BYUtv**: (**Currently broken**) - **CaffeineTV** - **Callin** - **Caltrans** - **CAM4** - **Camdemy** - **CamdemyFolder** - **CamFMEpisode** - **CamFMShow** - **CamModels** - **Camsoda** - **CamtasiaEmbed** - **Canal1** - **CanalAlpha** - **canalc2.tv** - **Canalplus**: mycanal.fr and piwiplus.fr - **CaracolTvPlay**: [*caracoltv-play*](## "netrc machine") - **CartoonNetwork** - **cbc.ca** - **cbc.ca:player** - **cbc.ca:​player:playlist** - **CBS**: (**Currently broken**) - **CBSLocal** - **CBSLocalArticle** - **CBSLocalLive** - **cbsnews**: CBS News - **cbsnews:embed** - **cbsnews:live**: CBS News Livestream - **cbsnews:livevideo**: CBS News Live Videos - **cbssports**: (**Currently broken**) - **cbssports:embed**: (**Currently broken**) - **CCMA** - **CCTV**: 央视网 - **CDA**: [*cdapl*](## "netrc machine") - **Cellebrite** - **CeskaTelevize** - **CGTN** - **CharlieRose** - **Chaturbate** - **Chilloutzone** - **chzzk:live** - **chzzk:video** - **cielotv.it** - **Cinemax**: (**Currently broken**) - **CinetecaMilano** - **Cineverse** - **CineverseDetails** - **CiscoLiveSearch** - **CiscoLiveSession** - **ciscowebex**: Cisco Webex - **CJSW** - **Clipchamp** - **Clippit** - **ClipRs**: (**Currently broken**) - **ClipYouEmbed** - **CloserToTruth**: (**Currently broken**) - **CloudflareStream** - **CloudyCDN** - **Clubic**: (**Currently broken**) - **Clyp** - **cmt.com**: (**Currently broken**) - **CNBCVideo** - **CNN** - **CNNArticle** - **CNNBlogs** - **CNNIndonesia** - **ComedyCentral** - **ComedyCentralTV** - **ConanClassic** - **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED - **CONtv** - **CookingChannel** - **Corus** - **Coub** - **CozyTV** - **cp24** - **cpac** - **cpac:playlist** - **Cracked** - **Crackle** - **Craftsy** - **CrooksAndLiars** - **CrowdBunker** - **CrowdBunkerChannel** - **Crtvg** - **crunchyroll**: [*crunchyroll*](## "netrc machine") - **crunchyroll:artist**: [*crunchyroll*](## "netrc machine") - **crunchyroll:music**: [*crunchyroll*](## "netrc machine") - **crunchyroll:playlist**: [*crunchyroll*](## "netrc machine") - **CSpan**: C-SPAN - **CSpanCongress** - **CtsNews**: 華視新聞 - **CTV** - **CTVNews** - **cu.ntv.co.jp**: Nippon Television Network - **CultureUnplugged** - **curiositystream**: [*curiositystream*](## "netrc machine") - **curiositystream:collections**: [*curiositystream*](## "netrc machine") - **curiositystream:series**: [*curiositystream*](## "netrc machine") - **CWTV** - **Cybrary**: [*cybrary*](## "netrc machine") - **CybraryCourse**: [*cybrary*](## "netrc machine") - **DacastPlaylist** - **DacastVOD** - **DagelijkseKost**: dagelijksekost.een.be - **DailyMail** - **dailymotion**: [*dailymotion*](## "netrc machine") - **dailymotion:playlist**: [*dailymotion*](## "netrc machine") - **dailymotion:search**: [*dailymotion*](## "netrc machine") - **dailymotion:user**: [*dailymotion*](## "netrc machine") - **DailyWire** - **DailyWirePodcast** - **damtomo:record** - **damtomo:video** - **dangalplay**: [*dangalplay*](## "netrc machine") - **dangalplay:season**: [*dangalplay*](## "netrc machine") - **daum.net** - **daum.net:clip** - **daum.net:playlist** - **daum.net:user** - **daystar:clip** - **DBTV** - **DctpTv** - **DeezerAlbum** - **DeezerPlaylist** - **democracynow** - **DestinationAmerica** - **DetikEmbed** - **DeuxM** - **DeuxMNews** - **DHM**: Filmarchiv - Deutsches Historisches Museum (**Currently broken**) - **DigitalConcertHall**: [*digitalconcerthall*](## "netrc machine") DigitalConcertHall extractor - **DigitallySpeaking** - **Digiteka** - **DiscogsReleasePlaylist** - **DiscoveryLife** - **DiscoveryNetworksDe** - **DiscoveryPlus** - **DiscoveryPlusIndia** - **DiscoveryPlusIndiaShow** - **DiscoveryPlusItaly** - **DiscoveryPlusItalyShow** - **Disney** - **dlf** - **dlf:corpus**: DLF Multi-feed Archives - **dlive:stream** - **dlive:vod** - **Douyin** - **DouyuShow** - **DouyuTV**: 斗鱼直播 - **DPlay** - **DRBonanza** - **Drooble** - **Dropbox** - **Dropout**: [*dropout*](## "netrc machine") - **DropoutSeason** - **DrTuber** - **drtv** - **drtv:live** - **drtv:season** - **drtv:series** - **DTube**: (**Currently broken**) - **duboku**: www.duboku.io - **duboku:list**: www.duboku.io entire series - **Dumpert** - **Duoplay** - **dvtv**: http://video.aktualne.cz/ - **dw**: (**Currently broken**) - **dw:article**: (**Currently broken**) - **EaglePlatform** - **EbaumsWorld** - **Ebay** - **egghead:course**: egghead.io course - **egghead:lesson**: egghead.io lesson - **EinsUndEinsTV**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVLive**: [*1und1tv*](## "netrc machine") - **EinsUndEinsTVRecordings**: [*1und1tv*](## "netrc machine") - **eitb.tv** - **ElementorEmbed** - **Elonet** - **ElPais**: El País - **ElTreceTV**: El Trece TV (Argentina) - **Embedly** - **EMPFlix** - **Epicon** - **EpiconSeries** - **EpidemicSound** - **eplus**: [*eplus*](## "netrc machine") e+ (イープラス) - **Epoch** - **Eporner** - **Erocast** - **EroProfile**: [*eroprofile*](## "netrc machine") - **EroProfile:album** - **ERRJupiter** - **ertflix**: ERTFLIX videos - **ertflix:codename**: ERTFLIX videos by codename - **ertwebtv:embed**: ert.gr webtv embedded videos - **ESPN** - **ESPNArticle** - **ESPNCricInfo** - **EttuTv** - **Europa**: (**Currently broken**) - **EuroParlWebstream** - **EuropeanTour** - **Eurosport** - **EUScreen** - **EWETV**: [*ewetv*](## "netrc machine") - **EWETVLive**: [*ewetv*](## "netrc machine") - **EWETVRecordings**: [*ewetv*](## "netrc machine") - **Expressen** - **EyedoTV** - **facebook**: [*facebook*](## "netrc machine") - **facebook:ads** - **facebook:reel** - **FacebookPluginsVideo** - **fancode:live**: [*fancode*](## "netrc machine") (**Currently broken**) - **fancode:vod**: [*fancode*](## "netrc machine") (**Currently broken**) - **Fathom** - **faz.net** - **fc2**: [*fc2*](## "netrc machine") - **fc2:embed** - **fc2:live** - **Fczenit** - **Fifa** - **filmon** - **filmon:channel** - **Filmweb** - **FiveThirtyEight** - **FiveTV** - **FlexTV** - **Flickr** - **Floatplane** - **FloatplaneChannel** - **Folketinget**: Folketinget (ft.dk; Danish parliament) - **FoodNetwork** - **FootyRoom** - **Formula1** - **FOX** - **FOX9** - **FOX9News** - **foxnews**: Fox News and Fox Business Video - **foxnews:article** - **FoxNewsVideo** - **FoxSports** - **fptplay**: fptplay.vn - **FranceCulture** - **FranceInter** - **FranceTV** - **francetvinfo.fr** - **FranceTVSite** - **Freesound** - **freespeech.org** - **freetv:series** - **FreeTvMovies** - **FrontendMasters**: [*frontendmasters*](## "netrc machine") - **FrontendMastersCourse**: [*frontendmasters*](## "netrc machine") - **FrontendMastersLesson**: [*frontendmasters*](## "netrc machine") - **FujiTVFODPlus7** - **Funimation**: [*funimation*](## "netrc machine") - **funimation:page**: [*funimation*](## "netrc machine") - **funimation:show**: [*funimation*](## "netrc machine") - **Funk** - **Funker530** - **Fux** - **FuyinTV** - **Gab** - **GabTV** - **Gaia**: [*gaia*](## "netrc machine") - **GameJolt** - **GameJoltCommunity** - **GameJoltGame** - **GameJoltGameSoundtrack** - **GameJoltSearch** - **GameJoltUser** - **GameSpot** - **GameStar** - **Gaskrank** - **Gazeta**: (**Currently broken**) - **GBNews**: GB News clips, features and live streams - **GDCVault**: [*gdcvault*](## "netrc machine") (**Currently broken**) - **GediDigital** - **gem.cbc.ca**: [*cbcgem*](## "netrc machine") - **gem.cbc.ca:live** - **gem.cbc.ca:playlist** - **Genius** - **GeniusLyrics** - **Germanupa**: germanupa.de - **GetCourseRu**: [*getcourseru*](## "netrc machine") - **GetCourseRuPlayer** - **Gettr** - **GettrStreaming** - **GiantBomb** - **GlattvisionTV**: [*glattvisiontv*](## "netrc machine") - **GlattvisionTVLive**: [*glattvisiontv*](## "netrc machine") - **GlattvisionTVRecordings**: [*glattvisiontv*](## "netrc machine") - **Glide**: Glide mobile video messages (glide.me) - **GlobalPlayerAudio** - **GlobalPlayerAudioEpisode** - **GlobalPlayerLive** - **GlobalPlayerLivePlaylist** - **GlobalPlayerVideo** - **Globo**: [*globo*](## "netrc machine") - **GloboArticle** - **glomex**: Glomex videos - **glomex:embed**: Glomex embedded videos - **GMANetworkVideo** - **Go** - **GoDiscovery** - **GodResource** - **GodTube**: (**Currently broken**) - **Gofile** - **Golem** - **goodgame:stream** - **google:podcasts** - **google:​podcasts:feed** - **GoogleDrive** - **GoogleDrive:Folder** - **GoPlay**: [*goplay*](## "netrc machine") - **GoPro** - **Goshgay** - **GoToStage** - **GPUTechConf** - **Graspop** - **Gronkh** - **gronkh:feed** - **gronkh:vods** - **Groupon** - **Harpodeon** - **hbo** - **HearThisAt** - **Heise** - **HellPorno** - **hetklokhuis** - **hgtv.com:show** - **HGTVDe** - **HGTVUsa** - **HiDive**: [*hidive*](## "netrc machine") - **HistoricFilms** - **history:player** - **history:topic**: History.com Topic - **HitRecord** - **hketv**: 香港教育局教育電視 (HKETV) Educational Television, Hong Kong Educational Bureau - **HollywoodReporter** - **HollywoodReporterPlaylist** - **Holodex** - **HotNewHipHop**: (**Currently broken**) - **hotstar** - **hotstar:playlist** - **hotstar:season** - **hotstar:series** - **hrfernsehen** - **HRTi**: [*hrti*](## "netrc machine") - **HRTiPlaylist**: [*hrti*](## "netrc machine") - **HSEProduct** - **HSEShow** - **html5** - **Huajiao**: 花椒直播 - **HuffPost**: Huffington Post - **Hungama** - **HungamaAlbumPlaylist** - **HungamaSong** - **huya:live**: huya.com - **huya:video**: 虎牙视频 - **Hypem** - **Hytale** - **Icareus** - **IdolPlus** - **iflix:episode** - **IflixSeries** - **ign.com** - **IGNArticle** - **IGNVideo** - **iheartradio** - **iheartradio:podcast** - **IlPost** - **Iltalehti** - **imdb**: Internet Movie Database trailers - **imdb:list**: Internet Movie Database lists - **Imgur** - **imgur:album** - **imgur:gallery** - **Ina** - **Inc** - **IndavideoEmbed** - **InfoQ** - **Instagram**: [*instagram*](## "netrc machine") - **instagram:story**: [*instagram*](## "netrc machine") - **instagram:tag**: [*instagram*](## "netrc machine") Instagram hashtag search URLs - **instagram:user**: [*instagram*](## "netrc machine") Instagram user profile (**Currently broken**) - **InstagramIOS**: IOS instagram:// URL - **Internazionale** - **InternetVideoArchive** - **InvestigationDiscovery** - **IPrima**: [*iprima*](## "netrc machine") - **IPrimaCNN** - **iq.com**: International version of iQiyi - **iq.com:album** - **iqiyi**: [*iqiyi*](## "netrc machine") 爱奇艺 - **IslamChannel** - **IslamChannelSeries** - **IsraelNationalNews** - **ITProTV** - **ITProTVCourse** - **ITV** - **ITVBTCC** - **ivi**: ivi.ru - **ivi:compilation**: ivi.ru compilations - **ivideon**: Ivideon TV - **IVXPlayer** - **iwara**: [*iwara*](## "netrc machine") - **iwara:playlist**: [*iwara*](## "netrc machine") - **iwara:user**: [*iwara*](## "netrc machine") - **Ixigua** - **Izlesene** - **Jamendo** - **JamendoAlbum** - **JeuxVideo**: (**Currently broken**) - **jiocinema**: [*jiocinema*](## "netrc machine") - **jiocinema:series**: [*jiocinema*](## "netrc machine") - **jiosaavn:album** - **jiosaavn:playlist** - **jiosaavn:song** - **Joj** - **JoqrAg**: 超!A&G+ 文化放送 (f.k.a. AGQR) Nippon Cultural Broadcasting, Inc. (JOQR) - **Jove** - **JStream** - **JTBC**: jtbc.co.kr - **JTBC:program** - **JWPlatform** - **Kakao** - **Kaltura** - **KankaNews**: (**Currently broken**) - **Karaoketv** - **Katsomo**: (**Currently broken**) - **KelbyOne**: (**Currently broken**) - **Ketnet** - **khanacademy** - **khanacademy:unit** - **kick:clips** - **kick:live** - **kick:vod** - **Kicker** - **KickStarter** - **Kika**: KiKA.de - **kinja:embed** - **KinoPoisk** - **Kommunetv** - **KompasVideo** - **Koo**: (**Currently broken**) - **KrasView**: Красвью (**Currently broken**) - **KTH** - **Ku6** - **KukuluLive** - **kuwo:album**: 酷我音乐 - 专辑 (**Currently broken**) - **kuwo:category**: 酷我音乐 - 分类 (**Currently broken**) - **kuwo:chart**: 酷我音乐 - 排行榜 (**Currently broken**) - **kuwo:mv**: 酷我音乐 - MV (**Currently broken**) - **kuwo:singer**: 酷我音乐 - 歌手 (**Currently broken**) - **kuwo:song**: 酷我音乐 (**Currently broken**) - **la7.it** - **la7.it:​pod:episode** - **la7.it:podcast** - **laracasts** - **laracasts:series** - **LastFM** - **LastFMPlaylist** - **LastFMUser** - **LaXarxaMes**: [*laxarxames*](## "netrc machine") - **lbry** - **lbry:channel** - **lbry:playlist** - **LCI** - **Lcp** - **LcpPlay** - **Le**: 乐视网 - **LearningOnScreen** - **Lecture2Go**: (**Currently broken**) - **Lecturio**: [*lecturio*](## "netrc machine") - **LecturioCourse**: [*lecturio*](## "netrc machine") - **LecturioDeCourse**: [*lecturio*](## "netrc machine") - **LeFigaroVideoEmbed** - **LeFigaroVideoSection** - **LEGO** - **Lemonde** - **Lenta**: (**Currently broken**) - **LePlaylist** - **LetvCloud**: 乐视云 - **Libsyn** - **life**: Life.ru - **life:embed** - **likee** - **likee:user** - **limelight** - **limelight:channel** - **limelight:channel_list** - **LinkedIn**: [*linkedin*](## "netrc machine") - **linkedin:learning**: [*linkedin*](## "netrc machine") - **linkedin:​learning:course**: [*linkedin*](## "netrc machine") - **Liputan6** - **ListenNotes** - **LiTV** - **LiveJournal** - **livestream** - **livestream:original** - **Livestreamfails** - **Lnk** - **loc**: Library of Congress - **loom** - **loom:folder** - **LoveHomePorn** - **LRTStream** - **LRTVOD** - **LSMLREmbed** - **LSMLTVEmbed** - **LSMReplay** - **Lumni** - **lynda**: [*lynda*](## "netrc machine") lynda.com videos - **lynda:course**: [*lynda*](## "netrc machine") lynda.com online courses - **maariv.co.il** - **MagellanTV** - **MagentaMusik** - **mailru**: Видео@Mail.Ru - **mailru:music**: Музыка@Mail.Ru - **mailru:​music:search**: Музыка@Mail.Ru - **MainStreaming**: MainStreaming Player - **mangomolo:live** - **mangomolo:video** - **MangoTV**: 芒果TV - **ManotoTV**: Manoto TV (Episode) - **ManotoTVLive**: Manoto TV (Live) - **ManotoTVShow**: Manoto TV (Show) - **ManyVids**: (**Currently broken**) - **MaoriTV** - **Markiza**: (**Currently broken**) - **MarkizaPage**: (**Currently broken**) - **massengeschmack.tv** - **Masters** - **MatchTV** - **MBN**: mbn.co.kr (매일방송) - **MDR**: MDR.DE - **MedalTV** - **media.ccc.de** - **media.ccc.de:lists** - **Mediaite** - **MediaKlikk** - **Medialaan** - **Mediaset** - **MediasetShow** - **Mediasite** - **MediasiteCatalog** - **MediasiteNamedCatalog** - **MediaStream** - **MediaWorksNZVOD** - **Medici** - **megaphone.fm**: megaphone.fm embedded players - **megatvcom**: megatv.com videos - **megatvcom:embed**: megatv.com embedded videos - **Meipai**: 美拍 - **MelonVOD** - **Metacritic** - **mewatch** - **MicrosoftBuild** - **MicrosoftEmbed** - **MicrosoftLearnEpisode** - **MicrosoftLearnPlaylist** - **MicrosoftLearnSession** - **MicrosoftMedius** - **microsoftstream**: Microsoft Stream - **mildom**: Record ongoing live by specific user in Mildom - **mildom:clip**: Clip in Mildom - **mildom:​user:vod**: Download all VODs from specific user in Mildom - **mildom:vod**: VOD in Mildom - **minds** - **minds:channel** - **minds:group** - **Minoto** - **mirrativ** - **mirrativ:user** - **MirrorCoUK** - **MiTele**: mitele.es - **mixch** - **mixch:archive** - **mixcloud** - **mixcloud:playlist** - **mixcloud:user** - **MLB** - **MLBArticle** - **MLBTV**: [*mlb*](## "netrc machine") - **MLBVideo** - **MLSSoccer** - **MNetTV**: [*mnettv*](## "netrc machine") - **MNetTVLive**: [*mnettv*](## "netrc machine") - **MNetTVRecordings**: [*mnettv*](## "netrc machine") - **MochaVideo** - **Mojevideo**: mojevideo.sk - **Mojvideo** - **Monstercat** - **MonsterSirenHypergryphMusic** - **Motherless** - **MotherlessGallery** - **MotherlessGroup** - **MotherlessUploader** - **Motorsport**: motorsport.com (**Currently broken**) - **MovieFap** - **Moviepilot** - **MoviewPlay** - **Moviezine** - **MovingImage** - **MSN**: (**Currently broken**) - **mtg**: MTG services - **mtv** - **mtv.de**: (**Currently broken**) - **mtv.it** - **mtv.it:programma** - **mtv:video** - **mtvjapan** - **mtvservices:embedded** - **MTVUutisetArticle**: (**Currently broken**) - **MuenchenTV**: münchen.tv (**Currently broken**) - **MujRozhlas** - **Murrtube** - **MurrtubeUser**: Murrtube user profile (**Currently broken**) - **MuseAI** - **MuseScore** - **MusicdexAlbum** - **MusicdexArtist** - **MusicdexPlaylist** - **MusicdexSong** - **Mx3** - **Mx3Neo** - **Mx3Volksmusik** - **Mxplayer** - **MxplayerShow** - **MySpace** - **MySpace:album** - **MySpass** - **MyVideoGe** - **MyVidster** - **Mzaalo** - **n-tv.de** - **N1Info:article** - **N1InfoAsset** - **Nate** - **NateProgram** - **natgeo:video** - **NationalGeographicTV** - **Naver** - **Naver:live** - **navernow** - **nba** - **nba:channel** - **nba:embed** - **nba:watch** - **nba:​watch:collection** - **nba:​watch:embed** - **NBC** - **NBCNews** - **nbcolympics** - **nbcolympics:stream** - **NBCSports** - **NBCSportsStream** - **NBCSportsVPlayer** - **NBCStations** - **ndr**: NDR.de - Norddeutscher Rundfunk - **ndr:embed** - **ndr:​embed:base** - **NDTV**: (**Currently broken**) - **nebula:channel**: [*watchnebula*](## "netrc machine") - **nebula:media**: [*watchnebula*](## "netrc machine") - **nebula:subscriptions**: [*watchnebula*](## "netrc machine") - **nebula:video**: [*watchnebula*](## "netrc machine") - **NekoHacker** - **NerdCubedFeed** - **netease:album**: 网易云音乐 - 专辑 - **netease:djradio**: 网易云音乐 - 电台 - **netease:mv**: 网易云音乐 - MV - **netease:playlist**: 网易云音乐 - 歌单 - **netease:program**: 网易云音乐 - 电台节目 - **netease:singer**: 网易云音乐 - 歌手 - **netease:song**: 网易云音乐 - **NetPlusTV**: [*netplus*](## "netrc machine") - **NetPlusTVLive**: [*netplus*](## "netrc machine") - **NetPlusTVRecordings**: [*netplus*](## "netrc machine") - **Netverse** - **NetversePlaylist** - **NetverseSearch**: "netsearch:" prefix - **Netzkino**: (**Currently broken**) - **Newgrounds**: [*newgrounds*](## "netrc machine") - **Newgrounds:playlist** - **Newgrounds:user** - **NewsPicks** - **Newsy** - **NextMedia**: 蘋果日報 - **NextMediaActionNews**: 蘋果日報 - 動新聞 - **NextTV**: 壹電視 (**Currently broken**) - **Nexx** - **NexxEmbed** - **nfb**: nfb.ca and onf.ca films and episodes - **nfb:series**: nfb.ca and onf.ca series - **NFHSNetwork** - **nfl.com** - **nfl.com:article** - **nfl.com:​plus:episode** - **nfl.com:​plus:replay** - **NhkForSchoolBangumi** - **NhkForSchoolProgramList** - **NhkForSchoolSubject**: Portal page for each school subjects, like Japanese (kokugo, 国語) or math (sansuu/suugaku or 算数・数学) - **NhkRadioNewsPage** - **NhkRadiru**: NHK らじる (Radiru/Rajiru) - **NhkRadiruLive** - **NhkVod** - **NhkVodProgram** - **nhl.com** - **nick.com** - **nick.de** - **nickelodeon:br** - **nickelodeonru** - **niconico**: [*niconico*](## "netrc machine") ニコニコ動画 - **niconico:history**: NicoNico user history or likes. Requires cookies. - **niconico:live**: ニコニコ生放送 - **niconico:playlist** - **niconico:series** - **niconico:tag**: NicoNico video tag URLs - **NiconicoChannelPlus**: ニコニコチャンネルプラス - **NiconicoChannelPlus:​channel:lives**: ニコニコチャンネルプラス - チャンネル - ライブリスト. nicochannel.jp/channel/lives - **NiconicoChannelPlus:​channel:videos**: ニコニコチャンネルプラス - チャンネル - 動画リスト. nicochannel.jp/channel/videos - **NiconicoUser** - **nicovideo:search**: Nico video search; "nicosearch:" prefix - **nicovideo:​search:date**: Nico video search, newest first; "nicosearchdate:" prefix - **nicovideo:search_url**: Nico video search URLs - **NinaProtocol** - **Nintendo** - **Nitter** - **njoy**: N-JOY - **njoy:embed** - **NobelPrize**: (**Currently broken**) - **NoicePodcast** - **NonkTube** - **NoodleMagazine** - **Noovo** - **NOSNLArticle** - **Nova**: TN.cz, Prásk.tv, Nova.cz, Novaplus.cz, FANDA.tv, Krásná.cz and Doma.cz - **NovaEmbed** - **NovaPlay** - **nowness** - **nowness:playlist** - **nowness:series** - **Noz**: (**Currently broken**) - **npo**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **npo.nl:live** - **npo.nl:radio** - **npo.nl:​radio:fragment** - **Npr** - **NRK** - **NRKPlaylist** - **NRKRadioPodkast** - **NRKSkole**: NRK Skole - **NRKTV**: NRK TV and NRK Radio - **NRKTVDirekte**: NRK TV Direkte and NRK Radio Direkte - **NRKTVEpisode** - **NRKTVEpisodes** - **NRKTVSeason** - **NRKTVSeries** - **NRLTV**: (**Currently broken**) - **nts.live** - **ntv.ru** - **NubilesPorn**: [*nubiles-porn*](## "netrc machine") - **nuum:live** - **nuum:media** - **nuum:tab** - **Nuvid** - **NYTimes** - **NYTimesArticle** - **NYTimesCookingGuide** - **NYTimesCookingRecipe** - **nzherald** - **NZOnScreen** - **NZZ** - **ocw.mit.edu** - **Odnoklassniki** - **OfTV** - **OfTVPlaylist** - **OktoberfestTV** - **OlympicsReplay** - **on24**: ON24 - **OnDemandChinaEpisode** - **OnDemandKorea** - **OnDemandKoreaProgram** - **OneFootball** - **OnePlacePodcast** - **onet.pl** - **onet.tv** - **onet.tv:channel** - **OnetMVP** - **OnionStudios** - **Opencast** - **OpencastPlaylist** - **openrec** - **openrec:capture** - **openrec:movie** - **OraTV** - **orf:​fm4:story**: fm4.orf.at stories - **orf:iptv**: iptv.ORF.at - **orf:on** - **orf:podcast** - **orf:radio** - **OsnatelTV**: [*osnateltv*](## "netrc machine") - **OsnatelTVLive**: [*osnateltv*](## "netrc machine") - **OsnatelTVRecordings**: [*osnateltv*](## "netrc machine") - **OutsideTV** - **OwnCloud** - **PacktPub**: [*packtpub*](## "netrc machine") - **PacktPubCourse** - **PalcoMP3:artist** - **PalcoMP3:song** - **PalcoMP3:video** - **Panopto** - **PanoptoList** - **PanoptoPlaylist** - **ParamountNetwork** - **ParamountPlus** - **ParamountPlusSeries** - **ParamountPressExpress** - **Parler**: Posts on parler.com - **parliamentlive.tv**: UK parliament videos - **Parlview**: (**Currently broken**) - **Patreon** - **PatreonCampaign** - **pbs**: Public Broadcasting Service (PBS) and member stations: PBS: Public Broadcasting Service, APT - Alabama Public Television (WBIQ), GPB/Georgia Public Broadcasting (WGTV), Mississippi Public Broadcasting (WMPN), Nashville Public Television (WNPT), WFSU-TV (WFSU), WSRE (WSRE), WTCI (WTCI), WPBA/Channel 30 (WPBA), Alaska Public Media (KAKM), Arizona PBS (KAET), KNME-TV/Channel 5 (KNME), Vegas PBS (KLVX), AETN/ARKANSAS ETV NETWORK (KETS), KET (WKLE), WKNO/Channel 10 (WKNO), LPB/LOUISIANA PUBLIC BROADCASTING (WLPB), OETA (KETA), Ozarks Public Television (KOZK), WSIU Public Broadcasting (WSIU), KEET TV (KEET), KIXE/Channel 9 (KIXE), KPBS San Diego (KPBS), KQED (KQED), KVIE Public Television (KVIE), PBS SoCal/KOCE (KOCE), ValleyPBS (KVPT), CONNECTICUT PUBLIC TELEVISION (WEDH), KNPB Channel 5 (KNPB), SOPTV (KSYS), Rocky Mountain PBS (KRMA), KENW-TV3 (KENW), KUED Channel 7 (KUED), Wyoming PBS (KCWC), Colorado Public Television / KBDI 12 (KBDI), KBYU-TV (KBYU), Thirteen/WNET New York (WNET), WGBH/Channel 2 (WGBH), WGBY (WGBY), NJTV Public Media NJ (WNJT), WLIW21 (WLIW), mpt/Maryland Public Television (WMPB), WETA Television and Radio (WETA), WHYY (WHYY), PBS 39 (WLVT), WVPT - Your Source for PBS and More! (WVPT), Howard University Television (WHUT), WEDU PBS (WEDU), WGCU Public Media (WGCU), WPBT2 (WPBT), WUCF TV (WUCF), WUFT/Channel 5 (WUFT), WXEL/Channel 42 (WXEL), WLRN/Channel 17 (WLRN), WUSF Public Broadcasting (WUSF), ETV (WRLK), UNC-TV (WUNC), PBS Hawaii - Oceanic Cable Channel 10 (KHET), Idaho Public Television (KAID), KSPS (KSPS), OPB (KOPB), KWSU/Channel 10 & KTNW/Channel 31 (KWSU), WILL-TV (WILL), Network Knowledge - WSEC/Springfield (WSEC), WTTW11 (WTTW), Iowa Public Television/IPTV (KDIN), Nine Network (KETC), PBS39 Fort Wayne (WFWA), WFYI Indianapolis (WFYI), Milwaukee Public Television (WMVS), WNIN (WNIN), WNIT Public Television (WNIT), WPT (WPNE), WVUT/Channel 22 (WVUT), WEIU/Channel 51 (WEIU), WQPT-TV (WQPT), WYCC PBS Chicago (WYCC), WIPB-TV (WIPB), WTIU (WTIU), CET (WCET), ThinkTVNetwork (WPTD), WBGU-TV (WBGU), WGVU TV (WGVU), NET1 (KUON), Pioneer Public Television (KWCM), SDPB Television (KUSD), TPT (KTCA), KSMQ (KSMQ), KPTS/Channel 8 (KPTS), KTWU/Channel 11 (KTWU), East Tennessee PBS (WSJK), WCTE-TV (WCTE), WLJT, Channel 11 (WLJT), WOSU TV (WOSU), WOUB/WOUC (WOUB), WVPB (WVPB), WKYU-PBS (WKYU), KERA 13 (KERA), MPBN (WCBB), Mountain Lake PBS (WCFE), NHPTV (WENH), Vermont PBS (WETK), witf (WITF), WQED Multimedia (WQED), WMHT Educational Telecommunications (WMHT), Q-TV (WDCQ), WTVS Detroit Public TV (WTVS), CMU Public Television (WCMU), WKAR-TV (WKAR), WNMU-TV Public TV 13 (WNMU), WDSE - WRPT (WDSE), WGTE TV (WGTE), Lakeland Public Television (KAWE), KMOS-TV - Channels 6.1, 6.2 and 6.3 (KMOS), MontanaPBS (KUSM), KRWG/Channel 22 (KRWG), KACV (KACV), KCOS/Channel 13 (KCOS), WCNY/Channel 24 (WCNY), WNED (WNED), WPBS (WPBS), WSKG Public TV (WSKG), WXXI (WXXI), WPSU (WPSU), WVIA Public Media Studios (WVIA), WTVI (WTVI), Western Reserve PBS (WNEO), WVIZ/PBS ideastream (WVIZ), KCTS 9 (KCTS), Basin PBS (KPBT), KUHT / Channel 8 (KUHT), KLRN (KLRN), KLRU (KLRU), WTJX Channel 12 (WTJX), WCVE PBS (WCVE), KBTC Public Television (KBTC) - **PBSKids** - **PearVideo** - **PeekVids** - **peer.tv** - **PeerTube** - **PeerTube:Playlist** - **peloton**: [*peloton*](## "netrc machine") - **peloton:live**: Peloton Live - **PerformGroup** - **periscope**: Periscope - **periscope:user**: Periscope user videos - **PGATour** - **PhilharmonieDeParis**: Philharmonie de Paris - **phoenix.de** - **Photobucket** - **Piapro**: [*piapro*](## "netrc machine") - **PIAULIZAPortal**: ulizaportal.jp - PIA LIVE STREAM - **Picarto** - **PicartoVod** - **Piksel** - **Pinkbike** - **Pinterest** - **PinterestCollection** - **pixiv:sketch** - **pixiv:​sketch:user** - **Pladform** - **PlanetMarathi** - **Platzi**: [*platzi*](## "netrc machine") - **PlatziCourse**: [*platzi*](## "netrc machine") - **player.sky.it** - **playeur** - **PlayPlusTV**: [*playplustv*](## "netrc machine") - **PlaySuisse**: [*playsuisse*](## "netrc machine") - **Playtvak**: Playtvak.cz, iDNES.cz and Lidovky.cz - **PlayVids** - **Playwire** - **pluralsight**: [*pluralsight*](## "netrc machine") - **pluralsight:course** - **PlutoTV**: (**Currently broken**) - **PodbayFM** - **PodbayFMChannel** - **Podchaser** - **podomatic**: (**Currently broken**) - **Pokemon** - **PokemonWatch** - **PokerGo**: [*pokergo*](## "netrc machine") - **PokerGoCollection**: [*pokergo*](## "netrc machine") - **PolsatGo** - **PolskieRadio** - **polskieradio:audition** - **polskieradio:category** - **polskieradio:legacy** - **polskieradio:player** - **polskieradio:podcast** - **polskieradio:​podcast:list** - **Popcorntimes** - **PopcornTV** - **Pornbox** - **PornerBros** - **PornFlip** - **PornHub**: [*pornhub*](## "netrc machine") PornHub and Thumbzilla - **PornHubPagedVideoList**: [*pornhub*](## "netrc machine") - **PornHubPlaylist**: [*pornhub*](## "netrc machine") - **PornHubUser**: [*pornhub*](## "netrc machine") - **PornHubUserVideosUpload**: [*pornhub*](## "netrc machine") - **Pornotube** - **PornoVoisines**: (**Currently broken**) - **PornoXO**: (**Currently broken**) - **PornTop** - **PornTube** - **Pr0gramm** - **PrankCast** - **PrankCastPost** - **PremiershipRugby** - **PressTV** - **ProjectVeritas**: (**Currently broken**) - **prosiebensat1**: ProSiebenSat.1 Digital - **PRXAccount** - **PRXSeries** - **prxseries:search**: PRX Series Search; "prxseries:" prefix - **prxstories:search**: PRX Stories Search; "prxstories:" prefix - **PRXStory** - **puhutv** - **puhutv:serie** - **Puls4** - **Pyvideo** - **QDance**: [*qdance*](## "netrc machine") - **QingTing** - **qqmusic**: QQ音乐 - **qqmusic:album**: QQ音乐 - 专辑 - **qqmusic:mv**: QQ音乐 - MV - **qqmusic:playlist**: QQ音乐 - 歌单 - **qqmusic:singer**: QQ音乐 - 歌手 - **qqmusic:toplist**: QQ音乐 - 排行榜 - **QuantumTV**: [*quantumtv*](## "netrc machine") - **QuantumTVLive**: [*quantumtv*](## "netrc machine") - **QuantumTVRecordings**: [*quantumtv*](## "netrc machine") - **R7**: (**Currently broken**) - **R7Article**: (**Currently broken**) - **Radiko** - **RadikoRadio** - **radio.de**: (**Currently broken**) - **Radio1Be** - **radiocanada** - **radiocanada:audiovideo** - **RadioComercial** - **RadioComercialPlaylist** - **radiofrance** - **RadioFranceLive** - **RadioFrancePodcast** - **RadioFranceProfile** - **RadioFranceProgramSchedule** - **RadioJavan**: (**Currently broken**) - **radiokapital** - **radiokapital:show** - **RadioZetPodcast** - **radlive** - **radlive:channel** - **radlive:season** - **Rai** - **RaiCultura** - **RaiNews** - **RaiPlay** - **RaiPlayLive** - **RaiPlayPlaylist** - **RaiPlaySound** - **RaiPlaySoundLive** - **RaiPlaySoundPlaylist** - **RaiSudtirol** - **RayWenderlich** - **RayWenderlichCourse** - **RbgTum** - **RbgTumCourse** - **RbgTumNewCourse** - **RCS** - **RCSEmbeds** - **RCSVarious** - **RCTIPlus** - **RCTIPlusSeries** - **RCTIPlusTV** - **RDS**: RDS.ca (**Currently broken**) - **RedBull** - **RedBullEmbed** - **RedBullTV** - **RedBullTVRrnContent** - **redcdnlivx** - **Reddit**: [*reddit*](## "netrc machine") - **RedGifs** - **RedGifsSearch**: Redgifs search - **RedGifsUser**: Redgifs user - **RedTube** - **RENTV**: (**Currently broken**) - **RENTVArticle**: (**Currently broken**) - **Restudy**: (**Currently broken**) - **Reuters**: (**Currently broken**) - **ReverbNation** - **RheinMainTV** - **RideHome** - **RinseFM** - **RinseFMArtistPlaylist** - **RMCDecouverte** - **RockstarGames**: (**Currently broken**) - **Rokfin**: [*rokfin*](## "netrc machine") - **rokfin:channel**: Rokfin Channels - **rokfin:search**: Rokfin Search; "rkfnsearch:" prefix - **rokfin:stack**: Rokfin Stacks - **RoosterTeeth**: [*roosterteeth*](## "netrc machine") - **RoosterTeethSeries**: [*roosterteeth*](## "netrc machine") - **RottenTomatoes** - **Rozhlas** - **RozhlasVltava** - **RTBF**: [*rtbf*](## "netrc machine") (**Currently broken**) - **RTDocumentry** - **RTDocumentryPlaylist** - **rte**: Raidió Teilifís Éireann TV - **rte:radio**: Raidió Teilifís Éireann radio - **rtl.lu:article** - **rtl.lu:tele-vod** - **rtl.nl**: rtl.nl and rtlxl.nl - **rtl2** - **RTLLuLive** - **RTLLuRadio** - **RTNews** - **RTP** - **RTRFM** - **RTS**: RTS.ch (**Currently broken**) - **RTVCKaltura** - **RTVCPlay** - **RTVCPlayEmbed** - **rtve.es:alacarta**: RTVE a la carta - **rtve.es:audio**: RTVE audio - **rtve.es:infantil**: RTVE infantil - **rtve.es:live**: RTVE.es live streams - **rtve.es:television** - **RTVS** - **rtvslo.si** - **rtvslo.si:show** - **RudoVideo** - **Rule34Video** - **Rumble** - **RumbleChannel** - **RumbleEmbed** - **Ruptly** - **rutube**: Rutube videos - **rutube:channel**: Rutube channel - **rutube:embed**: Rutube embedded videos - **rutube:movie**: Rutube movies - **rutube:person**: Rutube person videos - **rutube:playlist**: Rutube playlists - **rutube:tags**: Rutube tags - **RUTV**: RUTV.RU - **Ruutu** - **Ruv** - **ruv.is:spila** - **S4C** - **S4CSeries** - **safari**: [*safari*](## "netrc machine") safaribooksonline.com online video - **safari:api**: [*safari*](## "netrc machine") - **safari:course**: [*safari*](## "netrc machine") safaribooksonline.com online courses - **Saitosan**: (**Currently broken**) - **SAKTV**: [*saktv*](## "netrc machine") - **SAKTVLive**: [*saktv*](## "netrc machine") - **SAKTVRecordings**: [*saktv*](## "netrc machine") - **SaltTV**: [*salttv*](## "netrc machine") - **SaltTVLive**: [*salttv*](## "netrc machine") - **SaltTVRecordings**: [*salttv*](## "netrc machine") - **SampleFocus** - **Sangiin**: 参議院インターネット審議中継 (archive) - **Sapo**: SAPO Vídeos - **SBS**: sbs.com.au - **sbs.co.kr** - **sbs.co.kr:allvod_program** - **sbs.co.kr:programs_vod** - **schooltv** - **ScienceChannel** - **screen.yahoo:search**: Yahoo screen search; "yvsearch:" prefix - **Screen9** - **Screencast** - **Screencastify** - **ScreencastOMatic** - **ScreenRec** - **ScrippsNetworks** - **scrippsnetworks:watch** - **Scrolller** - **SCTE**: [*scte*](## "netrc machine") (**Currently broken**) - **SCTECourse**: [*scte*](## "netrc machine") (**Currently broken**) - **sejm** - **Sen** - **SenalColombiaLive**: (**Currently broken**) - **SenateGov** - **SenateISVP** - **SendtoNews**: (**Currently broken**) - **Servus** - **Sexu**: (**Currently broken**) - **SeznamZpravy** - **SeznamZpravyArticle** - **Shahid**: [*shahid*](## "netrc machine") - **ShahidShow** - **SharePoint** - **ShareVideosEmbed** - **ShemarooMe** - **ShowRoomLive** - **ShugiinItvLive**: 衆議院インターネット審議中継 - **ShugiinItvLiveRoom**: 衆議院インターネット審議中継 (中継) - **ShugiinItvVod**: 衆議院インターネット審議中継 (ビデオライブラリ) - **SibnetEmbed** - **simplecast** - **simplecast:episode** - **simplecast:podcast** - **Sina** - **Skeb** - **sky.it** - **sky:news** - **sky:​news:story** - **sky:sports** - **sky:​sports:news** - **SkylineWebcams**: (**Currently broken**) - **skynewsarabia:article**: (**Currently broken**) - **skynewsarabia:video**: (**Currently broken**) - **SkyNewsAU** - **Slideshare** - **SlidesLive** - **Slutload** - **Smotrim** - **SnapchatSpotlight** - **Snotr** - **Sohu** - **SohuV** - **SonyLIV**: [*sonyliv*](## "netrc machine") - **SonyLIVSeries** - **soundcloud**: [*soundcloud*](## "netrc machine") - **soundcloud:playlist**: [*soundcloud*](## "netrc machine") - **soundcloud:related**: [*soundcloud*](## "netrc machine") - **soundcloud:search**: [*soundcloud*](## "netrc machine") Soundcloud search; "scsearch:" prefix - **soundcloud:set**: [*soundcloud*](## "netrc machine") - **soundcloud:trackstation**: [*soundcloud*](## "netrc machine") - **soundcloud:user**: [*soundcloud*](## "netrc machine") - **soundcloud:​user:permalink**: [*soundcloud*](## "netrc machine") - **SoundcloudEmbed** - **soundgasm** - **soundgasm:profile** - **southpark.cc.com** - **southpark.cc.com:español** - **southpark.de** - **southpark.lat** - **southpark.nl** - **southparkstudios.dk** - **SovietsCloset** - **SovietsClosetPlaylist** - **SpankBang** - **SpankBangPlaylist** - **Spiegel** - **Sport5** - **SportBox** - **SportDeutschland** - **spotify**: Spotify episodes (**Currently broken**) - **spotify:show**: Spotify shows (**Currently broken**) - **Spreaker** - **SpreakerPage** - **SpreakerShow** - **SpreakerShowPage** - **SpringboardPlatform** - **Sprout** - **SproutVideo** - **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**) - **SRGSSR** - **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites - **StacommuLive**: [*stacommu*](## "netrc machine") - **StacommuVOD**: [*stacommu*](## "netrc machine") - **StagePlusVODConcert**: [*stageplus*](## "netrc machine") - **stanfordoc**: Stanford Open ClassRoom - **StarTrek**: (**Currently broken**) - **startv** - **Steam** - **SteamCommunityBroadcast** - **Stitcher** - **StitcherShow** - **StoryFire** - **StoryFireSeries** - **StoryFireUser** - **Streamable** - **StreamCZ** - **StreetVoice** - **StretchInternet** - **Stripchat** - **stv:player** - **Substack** - **SunPorno** - **sverigesradio:episode** - **sverigesradio:publication** - **SVT** - **SVTPage** - **SVTPlay**: SVT Play and Öppet arkiv - **SVTSeries** - **SwearnetEpisode** - **Syfy**: (**Currently broken**) - **SYVDK** - **SztvHu** - **t-online.de**: (**Currently broken**) - **Tagesschau**: (**Currently broken**) - **TapTapApp** - **TapTapAppIntl** - **TapTapMoment** - **TapTapPostIntl** - **Tass**: (**Currently broken**) - **TBS** - **TBSJPEpisode** - **TBSJPPlaylist** - **TBSJPProgram** - **Teachable**: [*teachable*](## "netrc machine") (**Currently broken**) - **TeachableCourse**: [*teachable*](## "netrc machine") - **teachertube**: teachertube.com videos (**Currently broken**) - **teachertube:​user:collection**: teachertube.com user and collection videos (**Currently broken**) - **TeachingChannel**: (**Currently broken**) - **Teamcoco** - **TeamTreeHouse**: [*teamtreehouse*](## "netrc machine") - **techtv.mit.edu** - **TedEmbed** - **TedPlaylist** - **TedSeries** - **TedTalk** - **Tele13** - **Tele5** - **TeleBruxelles** - **TelecaribePlay** - **Telecinco**: telecinco.es, cuatro.com and mediaset.es - **Telegraaf** - **telegram:embed** - **TeleMB**: (**Currently broken**) - **Telemundo**: (**Currently broken**) - **TeleQuebec** - **TeleQuebecEmission** - **TeleQuebecLive** - **TeleQuebecSquat** - **TeleQuebecVideo** - **TeleTask**: (**Currently broken**) - **Telewebion** - **Tempo** - **TennisTV**: [*tennistv*](## "netrc machine") - **TenPlay**: [*10play*](## "netrc machine") - **TenPlaySeason** - **TF1** - **TFO** - **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine") - **theatercomplextown:vod**: [*theatercomplextown*](## "netrc machine") - **TheGuardianPodcast** - **TheGuardianPodcastPlaylist** - **TheHoleTv** - **TheIntercept** - **ThePlatform** - **ThePlatformFeed** - **TheStar** - **TheSun** - **TheWeatherChannel** - **ThisAmericanLife** - **ThisOldHouse**: [*thisoldhouse*](## "netrc machine") - **ThisVid** - **ThisVidMember** - **ThisVidPlaylist** - **ThreeSpeak** - **ThreeSpeakUser** - **TikTok** - **tiktok:collection** - **tiktok:effect**: (**Currently broken**) - **tiktok:live** - **tiktok:sound**: (**Currently broken**) - **tiktok:tag**: (**Currently broken**) - **tiktok:user** - **TLC** - **TMZ** - **TNAFlix** - **TNAFlixNetworkEmbed** - **toggle** - **toggo** - **tokfm:audition** - **tokfm:podcast** - **ToonGoggles** - **tou.tv**: [*toutv*](## "netrc machine") - **Toypics**: Toypics video (**Currently broken**) - **ToypicsUser**: Toypics user profile (**Currently broken**) - **TrailerAddict**: (**Currently broken**) - **TravelChannel** - **Triller**: [*triller*](## "netrc machine") - **TrillerShort** - **TrillerUser**: [*triller*](## "netrc machine") - **Trovo** - **TrovoChannelClip**: All Clips of a trovo.live channel; "trovoclip:" prefix - **TrovoChannelVod**: All VODs of a trovo.live channel; "trovovod:" prefix - **TrovoVod** - **TrtCocukVideo** - **TrtWorld** - **TrueID** - **TruNews** - **Truth** - **TruTV** - **Tube8**: (**Currently broken**) - **TubeTuGraz**: [*tubetugraz*](## "netrc machine") tube.tugraz.at - **TubeTuGrazSeries**: [*tubetugraz*](## "netrc machine") - **tubitv**: [*tubitv*](## "netrc machine") - **tubitv:series** - **Tumblr**: [*tumblr*](## "netrc machine") - **TuneInPodcast** - **TuneInPodcastEpisode** - **TuneInStation** - **tv.dfb.de** - **TV2** - **TV2Article** - **TV2DK** - **TV2DKBornholmPlay** - **tv2play.hu** - **tv2playseries.hu** - **TV4**: tv4.se and tv4play.se - **TV5MONDE** - **tv5unis** - **tv5unis:video** - **tv8.it** - **TVANouvelles** - **TVANouvellesArticle** - **tvaplus**: TVA+ - **TVC** - **TVCArticle** - **TVer** - **tvigle**: Интернет-телевидение Tvigle.ru - **TVIPlayer** - **tvland.com** - **TVN24**: (**Currently broken**) - **TVNoe**: (**Currently broken**) - **tvopengr:embed**: tvopen.gr embedded videos - **tvopengr:watch**: tvopen.gr (and ethnos.gr) videos - **tvp**: Telewizja Polska - **tvp:embed**: Telewizja Polska - **tvp:stream** - **tvp:vod** - **tvp:​vod:series** - **TVPlayer** - **TVPlayHome** - **Tweakers** - **TwitCasting** - **TwitCastingLive** - **TwitCastingUser** - **twitch:clips**: [*twitch*](## "netrc machine") - **twitch:stream**: [*twitch*](## "netrc machine") - **twitch:vod**: [*twitch*](## "netrc machine") - **TwitchCollection**: [*twitch*](## "netrc machine") - **TwitchVideos**: [*twitch*](## "netrc machine") - **TwitchVideosClips**: [*twitch*](## "netrc machine") - **TwitchVideosCollections**: [*twitch*](## "netrc machine") - **twitter**: [*twitter*](## "netrc machine") - **twitter:amplify**: [*twitter*](## "netrc machine") - **twitter:broadcast**: [*twitter*](## "netrc machine") - **twitter:card** - **twitter:shortener**: [*twitter*](## "netrc machine") - **twitter:spaces**: [*twitter*](## "netrc machine") - **Txxx** - **udemy**: [*udemy*](## "netrc machine") - **udemy:course**: [*udemy*](## "netrc machine") - **UDNEmbed**: 聯合影音 - **UFCArabia**: [*ufcarabia*](## "netrc machine") - **UFCTV**: [*ufctv*](## "netrc machine") - **ukcolumn**: (**Currently broken**) - **UKTVPlay** - **umg:de**: Universal Music Deutschland (**Currently broken**) - **Unistra** - **Unity**: (**Currently broken**) - **uol.com.br** - **uplynk** - **uplynk:preplay** - **Urort**: NRK P3 Urørt (**Currently broken**) - **URPlay** - **USANetwork** - **USAToday** - **ustream** - **ustream:channel** - **ustudio** - **ustudio:embed** - **Varzesh3**: (**Currently broken**) - **Vbox7** - **Veo** - **Veoh** - **veoh:user** - **Vesti**: Вести.Ru (**Currently broken**) - **Vevo** - **VevoPlaylist** - **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet - **vh1.com** - **vhx:embed**: [*vimeo*](## "netrc machine") - **vice** - **vice:article** - **vice:show** - **Viddler** - **Videa** - **video.arnes.si**: Arnes Video - **video.google:search**: Google Video search; "gvsearch:" prefix - **video.sky.it** - **video.sky.it:live** - **VideoDetective** - **videofy.me**: (**Currently broken**) - **VideoKen** - **VideoKenCategory** - **VideoKenPlayer** - **VideoKenPlaylist** - **VideoKenTopic** - **videomore** - **videomore:season** - **videomore:video** - **VideoPress** - **Vidflex** - **Vidio**: [*vidio*](## "netrc machine") - **VidioLive**: [*vidio*](## "netrc machine") - **VidioPremier**: [*vidio*](## "netrc machine") - **VidLii** - **Vidly** - **vids.io** - **Vidyard** - **viewlift** - **viewlift:embed** - **Viidea** - **viki**: [*viki*](## "netrc machine") - **viki:channel**: [*viki*](## "netrc machine") - **vimeo**: [*vimeo*](## "netrc machine") - **vimeo:album**: [*vimeo*](## "netrc machine") - **vimeo:channel**: [*vimeo*](## "netrc machine") - **vimeo:group**: [*vimeo*](## "netrc machine") - **vimeo:likes**: [*vimeo*](## "netrc machine") Vimeo user likes - **vimeo:ondemand**: [*vimeo*](## "netrc machine") - **vimeo:pro**: [*vimeo*](## "netrc machine") - **vimeo:review**: [*vimeo*](## "netrc machine") Review pages on vimeo - **vimeo:user**: [*vimeo*](## "netrc machine") - **vimeo:watchlater**: [*vimeo*](## "netrc machine") Vimeo watch later list, ":vimeowatchlater" keyword (requires authentication) - **Vimm:recording** - **Vimm:stream** - **ViMP** - **ViMP:Playlist** - **Vine** - **vine:user** - **Viously** - **Viqeo**: (**Currently broken**) - **Viu** - **viu:ott**: [*viu*](## "netrc machine") - **viu:playlist** - **ViuOTTIndonesia** - **vk**: [*vk*](## "netrc machine") VK - **vk:uservideos**: [*vk*](## "netrc machine") VK - User's Videos - **vk:wallpost**: [*vk*](## "netrc machine") - **VKPlay** - **VKPlayLive** - **vm.tiktok** - **Vocaroo** - **VODPl** - **VODPlatform** - **voicy**: (**Currently broken**) - **voicy:channel**: (**Currently broken**) - **VolejTV** - **VoxMedia** - **VoxMediaVolume** - **vpro**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **vqq:series** - **vqq:video** - **VRT**: VRT NWS, Flanders News, Flandern Info and Sporza - **VrtNU**: [*vrtnu*](## "netrc machine") VRT MAX - **VTM**: (**Currently broken**) - **VTV** - **VTVGo** - **VTXTV**: [*vtxtv*](## "netrc machine") - **VTXTVLive**: [*vtxtv*](## "netrc machine") - **VTXTVRecordings**: [*vtxtv*](## "netrc machine") - **VuClip** - **VVVVID** - **VVVVIDShow** - **Walla** - **WalyTV**: [*walytv*](## "netrc machine") - **WalyTVLive**: [*walytv*](## "netrc machine") - **WalyTVRecordings**: [*walytv*](## "netrc machine") - **washingtonpost** - **washingtonpost:article** - **wat.tv** - **WatchESPN** - **WDR** - **wdr:mobile**: (**Currently broken**) - **WDRElefant** - **WDRPage** - **web.archive:youtube**: web.archive.org saved youtube videos, "ytarchive:" prefix - **Webcamerapl** - **Webcaster** - **WebcasterFeed** - **WebOfStories** - **WebOfStoriesPlaylist** - **Weibo** - **WeiboUser** - **WeiboVideo** - **WeiqiTV**: WQTV (**Currently broken**) - **wetv:episode** - **WeTvSeries** - **Weverse**: [*weverse*](## "netrc machine") - **WeverseLive**: [*weverse*](## "netrc machine") - **WeverseLiveTab**: [*weverse*](## "netrc machine") - **WeverseMedia**: [*weverse*](## "netrc machine") - **WeverseMediaTab**: [*weverse*](## "netrc machine") - **WeverseMoment**: [*weverse*](## "netrc machine") - **WeVidi** - **Weyyak** - **whowatch** - **Whyp** - **wikimedia.org** - **Wimbledon** - **WimTV** - **WinSportsVideo** - **Wistia** - **WistiaChannel** - **WistiaPlaylist** - **wnl**: npo.nl, ntr.nl, omroepwnl.nl, zapp.nl and npo3.nl - **wordpress:mb.miniAudioPlayer** - **wordpress:playlist** - **WorldStarHipHop** - **wppilot** - **wppilot:channels** - **WrestleUniversePPV**: [*wrestleuniverse*](## "netrc machine") - **WrestleUniverseVOD**: [*wrestleuniverse*](## "netrc machine") - **WSJ**: Wall Street Journal - **WSJArticle** - **WWE** - **wyborcza:video** - **WyborczaPodcast** - **wykop:dig** - **wykop:​dig:comment** - **wykop:post** - **wykop:​post:comment** - **Xanimu** - **XboxClips** - **XHamster** - **XHamsterEmbed** - **XHamsterUser** - **XiaoHongShu**: 小红书 - **ximalaya**: 喜马拉雅FM - **ximalaya:album**: 喜马拉雅FM 专辑 - **Xinpianchang**: 新片场 - **XMinus**: (**Currently broken**) - **XNXX** - **Xstream** - **XVideos** - **xvideos:quickies** - **XXXYMovies** - **Yahoo**: Yahoo screen and movies - **yahoo:japannews**: Yahoo! Japan News - **YandexDisk** - **yandexmusic:album**: Яндекс.Музыка - Альбом - **yandexmusic:​artist:albums**: Яндекс.Музыка - Артист - Альбомы - **yandexmusic:​artist:tracks**: Яндекс.Музыка - Артист - Треки - **yandexmusic:playlist**: Яндекс.Музыка - Плейлист - **yandexmusic:track**: Яндекс.Музыка - Трек - **YandexVideo** - **YandexVideoPreview** - **YapFiles**: (**Currently broken**) - **Yappy**: (**Currently broken**) - **YappyProfile** - **YleAreena** - **YouJizz** - **youku**: 优酷 - **youku:show** - **YouNowChannel** - **YouNowLive** - **YouNowMoment** - **YouPorn** - **YouPornCategory**: YouPorn category, with sorting, filtering and pagination - **YouPornChannel**: YouPorn channel, with sorting and pagination - **YouPornCollection**: YouPorn collection (user playlist), with sorting and pagination - **YouPornStar**: YouPorn Pornstar, with description, sorting and pagination - **YouPornTag**: YouPorn tag (porntags), with sorting, filtering and pagination - **YouPornVideos**: YouPorn video (browse) playlists, with sorting, filtering and pagination - **youtube**: YouTube - **youtube:clip** - **youtube:favorites**: YouTube liked videos; ":ytfav" keyword (requires cookies) - **youtube:history**: Youtube watch history; ":ythis" keyword (requires cookies) - **youtube:​music:search_url**: YouTube music search URLs with selectable sections, e.g. #songs - **youtube:notif**: YouTube notifications; ":ytnotif" keyword (requires cookies) - **youtube:playlist**: YouTube playlists - **youtube:recommended**: YouTube recommended videos; ":ytrec" keyword - **youtube:search**: YouTube search; "ytsearch:" prefix - **youtube:​search:date**: YouTube search, newest videos first; "ytsearchdate:" prefix - **youtube:search_url**: YouTube search URLs with sorting and filter support - **youtube:​shorts:pivot:audio**: YouTube Shorts audio pivot (Shorts using audio of a given video) - **youtube:subscriptions**: YouTube subscriptions feed; ":ytsubs" keyword (requires cookies) - **youtube:tab**: YouTube Tabs - **youtube:user**: YouTube user videos; "ytuser:" prefix - **youtube:watchlater**: Youtube watch later list; ":ytwatchlater" keyword (requires cookies) - **YoutubeLivestreamEmbed**: YouTube livestream embeds - **YoutubeYtBe**: youtu.be - **Zaiko** - **ZaikoETicket** - **Zapiks** - **Zattoo**: [*zattoo*](## "netrc machine") - **ZattooLive**: [*zattoo*](## "netrc machine") - **ZattooMovies**: [*zattoo*](## "netrc machine") - **ZattooRecordings**: [*zattoo*](## "netrc machine") - **ZDF** - **ZDFChannel** - **Zee5**: [*zee5*](## "netrc machine") - **zee5:series** - **ZeeNews**: (**Currently broken**) - **ZenPorn** - **ZenYandex** - **ZenYandexChannel** - **ZetlandDKArticle** - **Zhihu** - **zingmp3**: zingmp3.vn - **zingmp3:album** - **zingmp3:chart-home** - **zingmp3:chart-music-video** - **zingmp3:hub** - **zingmp3:liveradio** - **zingmp3:podcast** - **zingmp3:podcast-episode** - **zingmp3:user** - **zingmp3:week-chart** - **zoom** - **Zype** - **generic**: Generic downloader that works on some sites �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/�����������������������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0014332�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/__init__.py������������������������������������������������������������������0000664�0000000�0000000�00000000000�14675634471�0016431�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/conftest.py������������������������������������������������������������������0000664�0000000�0000000�00000004233�14675634471�0016533�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import inspect import pytest from yt_dlp.networking import RequestHandler from yt_dlp.networking.common import _REQUEST_HANDLERS from yt_dlp.utils._utils import _YDLLogger as FakeLogger @pytest.fixture def handler(request): RH_KEY = getattr(request, 'param', None) if not RH_KEY: return if inspect.isclass(RH_KEY) and issubclass(RH_KEY, RequestHandler): handler = RH_KEY elif RH_KEY in _REQUEST_HANDLERS: handler = _REQUEST_HANDLERS[RH_KEY] else: pytest.skip(f'{RH_KEY} request handler is not available') class HandlerWrapper(handler): RH_KEY = handler.RH_KEY def __init__(self, **kwargs): super().__init__(logger=FakeLogger, **kwargs) return HandlerWrapper @pytest.fixture(autouse=True) def skip_handler(request, handler): """usage: pytest.mark.skip_handler('my_handler', 'reason')""" for marker in request.node.iter_markers('skip_handler'): if marker.args[0] == handler.RH_KEY: pytest.skip(marker.args[1] if len(marker.args) > 1 else '') @pytest.fixture(autouse=True) def skip_handler_if(request, handler): """usage: pytest.mark.skip_handler_if('my_handler', lambda request: True, 'reason')""" for marker in request.node.iter_markers('skip_handler_if'): if marker.args[0] == handler.RH_KEY and marker.args[1](request): pytest.skip(marker.args[2] if len(marker.args) > 2 else '') @pytest.fixture(autouse=True) def skip_handlers_if(request, handler): """usage: pytest.mark.skip_handlers_if(lambda request, handler: True, 'reason')""" for marker in request.node.iter_markers('skip_handlers_if'): if handler and marker.args[0](request, handler): pytest.skip(marker.args[1] if len(marker.args) > 1 else '') def pytest_configure(config): config.addinivalue_line( 'markers', 'skip_handler(handler): skip test for the given handler', ) config.addinivalue_line( 'markers', 'skip_handler_if(handler): skip test for the given handler if condition is true', ) config.addinivalue_line( 'markers', 'skip_handlers_if(handler): skip test for handlers when the condition is true', ) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/helper.py��������������������������������������������������������������������0000664�0000000�0000000�00000030756�14675634471�0016176�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import errno import hashlib import json import os.path import re import ssl import sys import types import yt_dlp.extractor from yt_dlp import YoutubeDL from yt_dlp.compat import compat_os_name from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port if 'pytest' in sys.modules: import pytest is_download_test = pytest.mark.download else: def is_download_test(test_class): return test_class def get_params(override=None): PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'parameters.json') LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'local_parameters.json') with open(PARAMETERS_FILE, encoding='utf-8') as pf: parameters = json.load(pf) if os.path.exists(LOCAL_PARAMETERS_FILE): with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf: parameters.update(json.load(pf)) if override: parameters.update(override) return parameters def try_rm(filename): """ Remove a file if it exists """ try: os.remove(filename) except OSError as ose: if ose.errno != errno.ENOENT: raise def report_warning(message, *args, **kwargs): """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored """ if sys.stderr.isatty() and compat_os_name != 'nt': _msg_header = '\033[0;33mWARNING:\033[0m' else: _msg_header = 'WARNING:' output = f'{_msg_header} {message}\n' if 'b' in getattr(sys.stderr, 'mode', ''): output = output.encode(preferredencoding()) sys.stderr.write(output) class FakeYDL(YoutubeDL): def __init__(self, override=None): # Different instances of the downloader can't share the same dictionary # some test set the "sublang" parameter, which would break the md5 checks. params = get_params(override=override) super().__init__(params, auto_init=False) self.result = [] def to_screen(self, s, *args, **kwargs): print(s) def trouble(self, s, *args, **kwargs): raise Exception(s) def download(self, x): self.result.append(x) def expect_warning(self, regex): # Silence an expected warning matching a regex old_report_warning = self.report_warning def report_warning(self, message, *args, **kwargs): if re.match(regex, message): return old_report_warning(message, *args, **kwargs) self.report_warning = types.MethodType(report_warning, self) def gettestcases(include_onlymatching=False): for ie in yt_dlp.extractor.gen_extractors(): yield from ie.get_testcases(include_onlymatching) def getwebpagetestcases(): for ie in yt_dlp.extractor.gen_extractors(): for tc in ie.get_webpage_testcases(): tc.setdefault('add_ie', []).append('Generic') yield tc md5 = lambda s: hashlib.md5(s.encode()).hexdigest() def expect_value(self, got, expected, field): if isinstance(expected, str) and expected.startswith('re:'): match_str = expected[len('re:'):] match_rex = re.compile(match_str) self.assertTrue( isinstance(got, str), f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( match_rex.match(got), f'field {field} (value: {got!r}) should match {match_str!r}') elif isinstance(expected, str) and expected.startswith('startswith:'): start_str = expected[len('startswith:'):] self.assertTrue( isinstance(got, str), f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( got.startswith(start_str), f'field {field} (value: {got!r}) should start with {start_str!r}') elif isinstance(expected, str) and expected.startswith('contains:'): contains_str = expected[len('contains:'):] self.assertTrue( isinstance(got, str), f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}') self.assertTrue( contains_str in got, f'field {field} (value: {got!r}) should contain {contains_str!r}') elif isinstance(expected, type): self.assertTrue( isinstance(got, expected), f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}') elif isinstance(expected, dict) and isinstance(got, dict): expect_dict(self, got, expected) elif isinstance(expected, list) and isinstance(got, list): self.assertEqual( len(expected), len(got), f'Expect a list of length {len(expected)}, but got a list of length {len(got)} for field {field}') for index, (item_got, item_expected) in enumerate(zip(got, expected)): type_got = type(item_got) type_expected = type(item_expected) self.assertEqual( type_expected, type_got, f'Type mismatch for list item at index {index} for field {field}, ' f'expected {type_expected!r}, got {type_got!r}') expect_value(self, item_got, item_expected, field) else: if isinstance(expected, str) and expected.startswith('md5:'): self.assertTrue( isinstance(got, str), f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}') got = 'md5:' + md5(got) elif isinstance(expected, str) and re.match(r'^(?:min|max)?count:\d+', expected): self.assertTrue( isinstance(got, (list, dict)), f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}') op, _, expected_num = expected.partition(':') expected_num = int(expected_num) if op == 'mincount': assert_func = assertGreaterEqual msg_tmpl = 'Expected %d items in field %s, but only got %d' elif op == 'maxcount': assert_func = assertLessEqual msg_tmpl = 'Expected maximum %d items in field %s, but got %d' elif op == 'count': assert_func = assertEqual msg_tmpl = 'Expected exactly %d items in field %s, but got %d' else: assert False assert_func( self, len(got), expected_num, msg_tmpl % (expected_num, field, len(got))) return self.assertEqual( expected, got, f'Invalid value for field {field}, expected {expected!r}, got {got!r}') def expect_dict(self, got_dict, expected_dict): for info_field, expected in expected_dict.items(): got = got_dict.get(info_field) expect_value(self, got, expected, info_field) def sanitize_got_info_dict(got_dict): IGNORED_FIELDS = ( *YoutubeDL._format_fields, # Lists 'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries', # Auto-generated 'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'n_entries', 'fulltitle', 'extractor', 'extractor_key', 'filename', 'filepath', 'infojson_filename', 'original_url', # Only live_status needs to be checked 'is_live', 'was_live', ) IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage') def sanitize(key, value): if isinstance(value, str) and len(value) > 100 and key != 'thumbnail': return f'md5:{md5(value)}' elif isinstance(value, list) and len(value) > 10: return f'count:{len(value)}' elif key.endswith('_count') and isinstance(value, int): return int return value test_info_dict = { key: sanitize(key, value) for key, value in got_dict.items() if value is not None and key not in IGNORED_FIELDS and ( not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES) or key == '_old_archive_ids') } # display_id may be generated from id if test_info_dict.get('display_id') == test_info_dict.get('id'): test_info_dict.pop('display_id') # Remove deprecated fields for old in YoutubeDL._deprecated_multivalue_fields: test_info_dict.pop(old, None) # release_year may be generated from release_date if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])): test_info_dict.pop('release_year') # Check url for flat entries if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'): test_info_dict['url'] = got_dict['url'] return test_info_dict def expect_info_dict(self, got_dict, expected_dict): expect_dict(self, got_dict, expected_dict) # Check for the presence of mandatory fields if got_dict.get('_type') not in ('playlist', 'multi_video'): mandatory_fields = ['id', 'title'] if expected_dict.get('ext'): mandatory_fields.extend(('url', 'ext')) for key in mandatory_fields: self.assertTrue(got_dict.get(key), f'Missing mandatory field {key}') # Check for mandatory fields that are automatically set by YoutubeDL if got_dict.get('_type', 'video') == 'video': for key in ['webpage_url', 'extractor', 'extractor_key']: self.assertTrue(got_dict.get(key), f'Missing field: {key}') test_info_dict = sanitize_got_info_dict(got_dict) missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys()) if missing_keys: def _repr(v): if isinstance(v, str): return "'{}'".format(v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')) elif isinstance(v, type): return v.__name__ else: return repr(v) info_dict_str = ''.join( f' {_repr(k)}: {_repr(v)},\n' for k, v in test_info_dict.items() if k not in missing_keys) if info_dict_str: info_dict_str += '\n' info_dict_str += ''.join( f' {_repr(k)}: {_repr(test_info_dict[k])},\n' for k in missing_keys) info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n' write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr) self.assertFalse( missing_keys, 'Missing keys in test definition: {}'.format(', '.join(sorted(missing_keys)))) def assertRegexpMatches(self, text, regexp, msg=None): if hasattr(self, 'assertRegexp'): return self.assertRegexp(text, regexp, msg) else: m = re.match(regexp, text) if not m: note = f'Regexp didn\'t match: {regexp!r} not found' if len(text) < 1000: note += f' in {text!r}' if msg is None: msg = note else: msg = note + ', ' + msg self.assertTrue(m, msg) def assertGreaterEqual(self, got, expected, msg=None): if not (got >= expected): if msg is None: msg = f'{got!r} not greater than or equal to {expected!r}' self.assertTrue(got >= expected, msg) def assertLessEqual(self, got, expected, msg=None): if not (got <= expected): if msg is None: msg = f'{got!r} not less than or equal to {expected!r}' self.assertTrue(got <= expected, msg) def assertEqual(self, got, expected, msg=None): if got != expected: if msg is None: msg = f'{got!r} not equal to {expected!r}' self.assertTrue(got == expected, msg) def expect_warnings(ydl, warnings_re): real_warning = ydl.report_warning def _report_warning(w, *args, **kwargs): if not any(re.search(w_re, w) for w_re in warnings_re): real_warning(w, *args, **kwargs) ydl.report_warning = _report_warning def http_server_port(httpd): if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket): # In Jython SSLSocket is not a subclass of socket.socket sock = httpd.socket.sock else: sock = httpd.socket return sock.getsockname()[1] def verify_address_availability(address): if find_available_port(address) is None: pytest.skip(f'Unable to bind to source address {address} (address may not exist)') def validate_and_send(rh, req): rh.validate(req) return rh.send(req) ������������������yt-dlp-2024.09.27/test/parameters.json��������������������������������������������������������������0000664�0000000�0000000�00000002327�14675634471�0017374�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������{ "check_formats": false, "consoletitle": false, "continuedl": true, "forcedescription": false, "forcefilename": false, "forceformat": false, "forcethumbnail": false, "forcetitle": false, "forceurl": false, "force_write_download_archive": false, "format": "b/bv", "ignoreerrors": false, "listformats": null, "logtostderr": false, "matchtitle": null, "max_downloads": null, "overwrites": null, "nopart": false, "noprogress": false, "outtmpl": "%(id)s.%(ext)s", "password": null, "playliststart": 1, "prefer_free_formats": false, "quiet": false, "ratelimit": null, "rejecttitle": null, "retries": 10, "simulate": false, "subtitleslang": null, "subtitlesformat": "best", "test": true, "updatetime": true, "usenetrc": false, "username": null, "verbose": true, "writedescription": false, "writeinfojson": true, "writeannotations": false, "writelink": false, "writeurllink": false, "writewebloclink": false, "writedesktoplink": false, "writesubtitles": false, "allsubtitles": false, "listsubtitles": false, "fixup": "never", "allow_playlist_files": false } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_InfoExtractor.py��������������������������������������������������������0000664�0000000�0000000�00000304602�14675634471�0020537�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.server import threading from test.helper import FakeYDL, expect_dict, expect_value, http_server_port from yt_dlp.compat import compat_etree_fromstring from yt_dlp.extractor import YoutubeIE, get_info_extractor from yt_dlp.extractor.common import InfoExtractor from yt_dlp.utils import ( ExtractorError, RegexNotFoundError, encode_data_uri, strip_jsonp, ) TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>" class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass def do_GET(self): if self.path == '/teapot': self.send_response(TEAPOT_RESPONSE_STATUS) self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) else: assert False class DummyIE(InfoExtractor): def _sort_formats(self, formats, field_preference=[]): self._downloader.sort_formats( {'formats': formats, '_format_sort_fields': field_preference}) class TestInfoExtractor(unittest.TestCase): def setUp(self): self.ie = DummyIE(FakeYDL()) def test_ie_key(self): self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) def test_html_search_regex(self): html = '<p id="foo">Watch this <a href="http://www.youtube.com/watch?v=BaW_jenozKc">video</a></p>' search = lambda re, *args: self.ie._html_search_regex(re, html, *args) self.assertEqual(search(r'<p id="foo">(.+?)</p>', 'foo'), 'Watch this video') def test_opengraph(self): ie = self.ie html = ''' <meta name="og:title" content='Foo'/> <meta content="Some video's description " name="og:description"/> <meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/> <meta content='application/x-shockwave-flash' property='og:video:type'> <meta content='Foo' property=og:foobar> <meta name="og:test1" content='foo > < bar'/> <meta name="og:test2" content="foo >//< bar"/> <meta property=og-test3 content='Ill-formatted opengraph'/> <meta property=og:test4 content=unquoted-value/> ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2') self.assertEqual(ie._og_search_video_url(html, default=None), None) self.assertEqual(ie._og_search_property('foobar', html), 'Foo') self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar') self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar') self.assertEqual(ie._og_search_property('test3', html), 'Ill-formatted opengraph') self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value') def test_html_search_meta(self): ie = self.ie html = ''' <meta name="a" content="1" /> <meta name='b' content='2'> <meta name="c" content='3'> <meta name=d content='4'> <meta property="e" content='5' > <meta content="6" name="f"> ''' self.assertEqual(ie._html_search_meta('a', html), '1') self.assertEqual(ie._html_search_meta('b', html), '2') self.assertEqual(ie._html_search_meta('c', html), '3') self.assertEqual(ie._html_search_meta('d', html), '4') self.assertEqual(ie._html_search_meta('e', html), '5') self.assertEqual(ie._html_search_meta('f', html), '6') self.assertEqual(ie._html_search_meta(('a', 'b', 'c'), html), '1') self.assertEqual(ie._html_search_meta(('c', 'b', 'a'), html), '3') self.assertEqual(ie._html_search_meta(('z', 'x', 'c'), html), '3') self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) def test_search_json_ld_realworld(self): _TESTS = [ # https://github.com/ytdl-org/youtube-dl/issues/23306 ( r'''<script type="application/ld+json"> { "@context": "http://schema.org/", "@type": "VideoObject", "name": "1 On 1 With Kleio", "url": "https://www.eporner.com/hd-porn/xN49A1cT3eB/1-On-1-With-Kleio/", "duration": "PT0H12M23S", "thumbnailUrl": ["https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "https://imggen.eporner.com/780814/1920/1080/9.jpg"], "contentUrl": "https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4", "embedUrl": "https://www.eporner.com/embed/xN49A1cT3eB/1-On-1-With-Kleio/", "image": "https://static-eu-cdn.eporner.com/thumbs/static4/7/78/780/780814/9_360.jpg", "width": "1920", "height": "1080", "encodingFormat": "mp4", "bitrate": "6617kbps", "isFamilyFriendly": "False", "description": "Kleio Valentien", "uploadDate": "2015-12-05T21:24:35+01:00", "interactionStatistic": { "@type": "InteractionCounter", "interactionType": { "@type": "http://schema.org/WatchAction" }, "userInteractionCount": 1120958 }, "aggregateRating": { "@type": "AggregateRating", "ratingValue": "88", "ratingCount": "630", "bestRating": "100", "worstRating": "0" }, "actor": [{ "@type": "Person", "name": "Kleio Valentien", "url": "https://www.eporner.com/pornstar/kleio-valentien/" }]} </script>''', { 'title': '1 On 1 With Kleio', 'description': 'Kleio Valentien', 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', 'timestamp': 1449347075, 'duration': 743.0, 'view_count': 1120958, 'width': 1920, 'height': 1080, }, {}, ), ( r'''<script type="application/ld+json"> { "@context": "https://schema.org", "@graph": [ { "@type": "NewsArticle", "mainEntityOfPage": { "@type": "WebPage", "@id": "https://www.ant1news.gr/Society/article/620286/symmoria-anilikon-dikigoros-thymaton-ithelan-na-toys-apoteleiosoyn" }, "headline": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", "name": "Συμμορία ανηλίκων – δικηγόρος θυμάτων: ήθελαν να τους αποτελειώσουν", "description": "Τα παιδιά δέχθηκαν την επίθεση επειδή αρνήθηκαν να γίνουν μέλη της συμμορίας, ανέφερε ο Γ. Ζαχαρόπουλος.", "image": { "@type": "ImageObject", "url": "https://ant1media.azureedge.net/imgHandler/1100/a635c968-be71-447c-bf9c-80d843ece21e.jpg", "width": 1100, "height": 756 }, "datePublished": "2021-11-10T08:50:00+03:00", "dateModified": "2021-11-10T08:52:53+03:00", "author": { "@type": "Person", "@id": "https://www.ant1news.gr/", "name": "Ant1news", "image": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", "url": "https://www.ant1news.gr/" }, "publisher": { "@type": "Organization", "@id": "https://www.ant1news.gr#publisher", "name": "Ant1news", "url": "https://www.ant1news.gr", "logo": { "@type": "ImageObject", "url": "https://www.ant1news.gr/images/logo-e5d7e4b3e714c88e8d2eca96130142f6.png", "width": 400, "height": 400 }, "sameAs": [ "https://www.facebook.com/Ant1news.gr", "https://twitter.com/antennanews", "https://www.youtube.com/channel/UC0smvAbfczoN75dP0Hw4Pzw", "https://www.instagram.com/ant1news/" ] }, "keywords": "μαχαίρωμα,συμμορία ανηλίκων,ΕΙΔΗΣΕΙΣ,ΕΙΔΗΣΕΙΣ ΣΗΜΕΡΑ,ΝΕΑ,Κοινωνία - Ant1news", "articleSection": "Κοινωνία" } ] } </script>''', { 'timestamp': 1636523400, 'title': 'md5:91fe569e952e4d146485740ae927662b', }, {'expected_type': 'NewsArticle'}, ), ( r'''<script type="application/ld+json"> {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/", "name":"Het journaal 19u", "description":"Het journaal 19u van vrijdag 31 december 2021.", "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"}, "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"}, "publication":[{ "startDate":"2021-12-31T19:00:00.000+01:00", "endDate":"2022-01-30T23:55:00.000+01:00", "publishedBy":{"name":"een","@type":"Organization"}, "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"}, "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8", "@type":"BroadcastEvent" }], "video":{ "name":"Het journaal - Aflevering 365 (Seizoen 2021)", "description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.", "thumbnailUrl":"//images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg", "expires":"2022-01-30T23:55:00.000+01:00", "hasPart":[ {"name":"Explosie Turnhout","startOffset":70,"@type":"Clip"}, {"name":"Jaarwisseling","startOffset":440,"@type":"Clip"}, {"name":"Natuurbranden Colorado","startOffset":1179,"@type":"Clip"}, {"name":"Klimaatverandering","startOffset":1263,"@type":"Clip"}, {"name":"Zacht weer","startOffset":1367,"@type":"Clip"}, {"name":"Financiële balans","startOffset":1383,"@type":"Clip"}, {"name":"Club Brugge","startOffset":1484,"@type":"Clip"}, {"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"}, {"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"}, {"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"} ], "duration":"PT34M39.23S", "uploadDate":"2021-12-31T19:00:00.000+01:00", "@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5", "@type":"VideoObject" }, "genre":["Nieuws en actua"], "episodeNumber":365, "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"}, "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"}, "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script> ''', { 'chapters': [ {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440}, {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179}, {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263}, {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367}, {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383}, {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484}, {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575}, {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728}, {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873}, {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23}, ], 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)', }, {}, ), ( # test multiple thumbnails in a list r''' <script type="application/ld+json"> {"@context":"https://schema.org", "@type":"VideoObject", "thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]} </script>''', { 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, ), ( # test single thumbnail r''' <script type="application/ld+json"> {"@context":"https://schema.org", "@type":"VideoObject", "thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"} </script>''', { 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], }, {}, ), ] for html, expected_dict, search_json_ld_kwargs in _TESTS: expect_dict( self, self.ie._search_json_ld(html, None, **search_json_ld_kwargs), expected_dict, ) def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) uri = encode_data_uri(b'callback({"foo": "blah"})', 'application/javascript') self.assertEqual(self.ie._download_json(uri, None, transform_source=strip_jsonp), {'foo': 'blah'}) uri = encode_data_uri(b'{"foo": invalid}', 'application/json') self.assertRaises(ExtractorError, self.ie._download_json, uri, None) self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) def test_parse_html5_media_entries(self): # inline video tag expect_dict( self, self.ie._parse_html5_media_entries( 'https://127.0.0.1/video.html', r'<html><video src="/vid.mp4" /></html>', None)[0], { 'formats': [{ 'url': 'https://127.0.0.1/vid.mp4', }], }) # from https://www.r18.com/ # with kpbs in label expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.r18.com/', r''' <video id="samplevideo_amateur" class="js-samplevideo video-js vjs-default-skin vjs-big-play-centered" controls preload="auto" width="400" height="225" poster="//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg"> <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4" type="video/mp4" res="240" label="300kbps"> <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4" type="video/mp4" res="480" label="1000kbps"> <source id="video_source" src="https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4" type="video/mp4" res="740" label="1500kbps"> <p>Your browser does not support the video tag.</p> </video> ''', None)[0], { 'formats': [{ 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_sm_w.mp4', 'ext': 'mp4', 'format_id': '300kbps', 'height': 240, 'tbr': 300, }, { 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dm_w.mp4', 'ext': 'mp4', 'format_id': '1000kbps', 'height': 480, 'tbr': 1000, }, { 'url': 'https://awscc3001.r18.com/litevideo/freepv/m/mgm/mgmr105/mgmr105_dmb_w.mp4', 'ext': 'mp4', 'format_id': '1500kbps', 'height': 740, 'tbr': 1500, }], 'thumbnail': '//pics.r18.com/digital/amateur/mgmr105/mgmr105jp.jpg', }) # from https://www.csfd.cz/ # with width and height expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.csfd.cz/', r''' <video width="770" height="328" preload="none" controls poster="https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360" > <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4" type="video/mp4" width="640" height="360"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4" type="video/mp4" width="1280" height="720"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4" type="video/mp4" width="1920" height="1080"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm" type="video/webm" width="640" height="360"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm" type="video/webm" width="1280" height="720"> <source src="https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm" type="video/webm" width="1920" height="1080"> <track src="https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt" type="text/x-srt" kind="subtitles" srclang="cs" label="cs"> </video> ''', None)[0], { 'formats': [{ 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327358_eac647.mp4', 'ext': 'mp4', 'width': 640, 'height': 360, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327360_3d2646.mp4', 'ext': 'mp4', 'width': 1280, 'height': 720, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327356_91f258.mp4', 'ext': 'mp4', 'width': 1920, 'height': 1080, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327359_962b4a.webm', 'ext': 'webm', 'width': 640, 'height': 360, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327361_6feee0.webm', 'ext': 'webm', 'width': 1280, 'height': 720, }, { 'url': 'https://video.csfd.cz/files/videos/157/750/157750813/163327357_8ab472.webm', 'ext': 'webm', 'width': 1920, 'height': 1080, }], 'subtitles': { 'cs': [{'url': 'https://video.csfd.cz/files/subtitles/163/344/163344115_4c388b.srt'}], }, 'thumbnail': 'https://img.csfd.cz/files/images/film/video/preview/163/344/163344118_748d20.png?h360', }) # from https://tamasha.com/v/Kkdjw # with height in label expect_dict( self, self.ie._parse_html5_media_entries( 'https://tamasha.com/v/Kkdjw', r''' <video crossorigin="anonymous"> <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="AUTO" res="0"/> <source src="https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4" type="video/mp4" label="240p" res="240"/> <source src="https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4" type="video/mp4" label="144p" res="144"/> </video> ''', None)[0], { 'formats': [{ 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', }, { 'url': 'https://s-v2.tamasha.com/statics/videos_file/19/8f/Kkdjw_198feff8577d0057536e905cce1fb61438dd64e0_n_240.mp4', 'ext': 'mp4', 'format_id': '240p', 'height': 240, }, { 'url': 'https://s-v2.tamasha.com/statics/videos_file/20/00/Kkdjw_200041c66f657fc967db464d156eafbc1ed9fe6f_n_144.mp4', 'ext': 'mp4', 'format_id': '144p', 'height': 144, }], }) # from https://www.directvnow.com # with data-src expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.directvnow.com', r''' <video id="vid1" class="header--video-masked active" muted playsinline> <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" /> </video> ''', None)[0], { 'formats': [{ 'ext': 'mp4', 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', }], }) # from https://www.directvnow.com # with data-src expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.directvnow.com', r''' <video id="vid1" class="header--video-masked active" muted playsinline> <source data-src="https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4" type="video/mp4" /> </video> ''', None)[0], { 'formats': [{ 'url': 'https://cdn.directv.com/content/dam/dtv/prod/website_directvnow-international/videos/DTVN_hdr_HBO_v3.mp4', 'ext': 'mp4', }], }) # from https://www.klarna.com/uk/ # with data-video-src expect_dict( self, self.ie._parse_html5_media_entries( 'https://www.directvnow.com', r''' <video loop autoplay muted class="responsive-video block-kl__video video-on-medium"> <source src="" data-video-desktop data-video-src="https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4" type="video/mp4" /> </video> ''', None)[0], { 'formats': [{ 'url': 'https://www.klarna.com/uk/wp-content/uploads/sites/11/2019/01/KL062_Smooth3_0_DogWalking_5s_920x080_.mp4', 'ext': 'mp4', }], }) # from https://0000.studio/ # with type attribute but without extension in URL expect_dict( self, self.ie._parse_html5_media_entries( 'https://0000.studio', r''' <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92" controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain"> </video> ''', None)[0], { 'formats': [{ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92', 'ext': 'mp4', }], }) def test_extract_jwplayer_data_realworld(self): # from http://www.suffolk.edu/sjc/ expect_dict( self, self.ie._extract_jwplayer_data(r''' <script type='text/javascript'> jwplayer('my-video').setup({ file: 'rtmp://192.138.214.154/live/sjclive', fallback: 'true', width: '95%', aspectratio: '16:9', primary: 'flash', mediaid:'XEgvuql4' }); </script> ''', None, require_title=False), { 'id': 'XEgvuql4', 'formats': [{ 'url': 'rtmp://192.138.214.154/live/sjclive', 'ext': 'flv', }], }) # from https://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary/ expect_dict( self, self.ie._extract_jwplayer_data(r''' <script type="text/javascript"> jwplayer("mediaplayer").setup({ 'videoid': "7564", 'width': "100%", 'aspectratio': "16:9", 'stretching': "exactfit", 'autostart': 'false', 'flashplayer': "https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf", 'file': "https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv", 'image': "https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg", 'filefallback': "https://cdn.pornoxo.com/key=9ZPsTR5EvPLQrBaak2MUGA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/m_4b2157147afe5efa93ce1978e0265289c193874e02597.mp4", 'logo.hide': true, 'skin': "https://t04.vipstreamservice.com/jwplayer/skin/modieus-blk.zip", 'plugins': "https://t04.vipstreamservice.com/jwplayer/dock/dockableskinnableplugin.swf", 'dockableskinnableplugin.piclink': "/index.php?key=ajax-videothumbsn&vid=7564&data=2009-12--14--4b2157147afe5efa93ce1978e0265289c193874e02597.flv--17370", 'controlbar': 'bottom', 'modes': [ {type: 'flash', src: 'https://t04.vipstreamservice.com/jwplayer/v5.10/player.swf'} ], 'provider': 'http' }); //noinspection JSAnnotator invideo.setup({ adsUrl: "/banner-iframe/?zoneId=32", adsUrl2: "", autostart: false }); </script> ''', 'dummy', require_title=False), { 'thumbnail': 'https://t03.vipstreamservice.com/thumbs/pxo-full/2009-12/14/a4b2157147afe5efa93ce1978e0265289c193874e02597.flv-full-13.jpg', 'formats': [{ 'url': 'https://cdn.pornoxo.com/key=MF+oEbaxqTKb50P-w9G3nA,end=1489689259,ip=104.199.146.27/ip=104.199.146.27/speed=6573765/buffer=3.0/2009-12/4b2157147afe5efa93ce1978e0265289c193874e02597.flv', 'ext': 'flv', }], }) # from http://www.indiedb.com/games/king-machine/videos expect_dict( self, self.ie._extract_jwplayer_data(r''' <script> jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/\/www.indiedb.com\/","displaytitle":false,"autostart":false,"repeat":false,"title":"king machine trailer 1","sharing":{"link":"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1","code":"<iframe width=\"560\" height=\"315\" src=\"http:\/\/www.indiedb.com\/media\/iframe\/1522983\" frameborder=\"0\" allowfullscreen><\/iframe><br><a href=\"http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1\">king machine trailer 1 - Indie DB<\/a>"},"related":{"file":"http:\/\/rss.indiedb.com\/media\/recommended\/1522983\/feed\/rss.xml","dimensions":"160x120","onclick":"link"},"sources":[{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode_mp4\/king-machine-trailer.mp4","label":"360p SD","default":"true"},{"file":"http:\/\/cdn.dbolical.com\/cache\/videos\/games\/1\/50\/49678\/encode720p_mp4\/king-machine-trailer.mp4","label":"720p HD"}],"image":"http:\/\/media.indiedb.com\/cache\/images\/games\/1\/50\/49678\/thumb_620x2000\/king-machine-trailer.mp4.jpg","advertising":{"client":"vast","tag":"http:\/\/ads.intergi.com\/adrawdata\/3.0\/5205\/4251742\/0\/1013\/ADTECH;cors=yes;width=560;height=315;referring_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;content_url=http:\/\/www.indiedb.com\/games\/king-machine\/videos\/king-machine-trailer-1;media_id=1522983;title=king+machine+trailer+1;device=__DEVICE__;model=__MODEL__;os=Windows+OS;osversion=__OSVERSION__;ua=__UA__;ip=109.171.17.81;uniqueid=1522983;tags=__TAGS__;number=58cac25928151;time=1489683033"},"width":620,"height":349}).once("play", function(event) { videoAnalytics("play"); }).once("complete", function(event) { videoAnalytics("completed"); }); </script> ''', 'dummy'), { 'title': 'king machine trailer 1', 'thumbnail': 'http://media.indiedb.com/cache/images/games/1/50/49678/thumb_620x2000/king-machine-trailer.mp4.jpg', 'formats': [{ 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode_mp4/king-machine-trailer.mp4', 'height': 360, 'ext': 'mp4', }, { 'url': 'http://cdn.dbolical.com/cache/videos/games/1/50/49678/encode720p_mp4/king-machine-trailer.mp4', 'height': 720, 'ext': 'mp4', }], }) def test_parse_m3u8_formats(self): _TEST_CASES = [ ( # https://github.com/ytdl-org/youtube-dl/issues/11995 # http://teamcoco.com/video/clueless-gamer-super-bowl-for-honor 'img_bipbop_adv_example_fmp4', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', [{ 'format_id': 'aud1-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { 'format_id': 'aud2-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { 'format_id': 'aud3-English', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/a3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'language': 'en', 'ext': 'mp4', 'protocol': 'm3u8_native', 'audio_ext': 'mp4', }, { 'format_id': '530', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 480, 'height': 270, 'vcodec': 'avc1.640015', }, { 'format_id': '561', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 480, 'height': 270, 'vcodec': 'avc1.640015', }, { 'format_id': '753', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 480, 'height': 270, 'vcodec': 'avc1.640015', }, { 'format_id': '895', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 640, 'height': 360, 'vcodec': 'avc1.64001e', }, { 'format_id': '926', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 640, 'height': 360, 'vcodec': 'avc1.64001e', }, { 'format_id': '1118', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 640, 'height': 360, 'vcodec': 'avc1.64001e', }, { 'format_id': '1265', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 768, 'height': 432, 'vcodec': 'avc1.64001e', }, { 'format_id': '1295', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 768, 'height': 432, 'vcodec': 'avc1.64001e', }, { 'format_id': '1487', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 768, 'height': 432, 'vcodec': 'avc1.64001e', }, { 'format_id': '2168', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 960, 'height': 540, 'vcodec': 'avc1.640020', }, { 'format_id': '2198', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 960, 'height': 540, 'vcodec': 'avc1.640020', }, { 'format_id': '2390', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 960, 'height': 540, 'vcodec': 'avc1.640020', }, { 'format_id': '3168', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1280, 'height': 720, 'vcodec': 'avc1.640020', }, { 'format_id': '3199', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1280, 'height': 720, 'vcodec': 'avc1.640020', }, { 'format_id': '3391', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v6/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1280, 'height': 720, 'vcodec': 'avc1.640020', }, { 'format_id': '4670', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '4701', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '4893', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v7/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '6170', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '6200', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '6392', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v8/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '7968', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '7998', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }, { 'format_id': '8190', 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/v9/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/img_bipbop_adv_example_fmp4/master.m3u8', 'ext': 'mp4', 'protocol': 'm3u8_native', 'width': 1920, 'height': 1080, 'vcodec': 'avc1.64002a', }], {}, ), ( 'bipbop_16x9', 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', [{ 'format_id': 'bipbop_audio-BipBop Audio 2', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/alternate_audio_aac/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'language': 'eng', 'ext': 'mp4', 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'vcodec': 'none', 'audio_ext': 'mp4', 'video_ext': 'none', }, { 'format_id': '41', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear0/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 41.457, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'audio_ext': 'mp4', 'video_ext': 'none', 'abr': 41.457, }, { 'format_id': '263', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear1/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 263.851, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 416, 'height': 234, 'vcodec': 'avc1.4d400d', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', }, { 'format_id': '577', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear2/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 577.61, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 640, 'height': 360, 'vcodec': 'avc1.4d401e', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', }, { 'format_id': '915', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear3/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 915.905, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 960, 'height': 540, 'vcodec': 'avc1.4d401f', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', }, { 'format_id': '1030', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear4/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 1030.138, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 1280, 'height': 720, 'vcodec': 'avc1.4d401f', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', }, { 'format_id': '1924', 'format_index': None, 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/gear5/prog_index.m3u8', 'manifest_url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/bipbop_16x9_variant.m3u8', 'tbr': 1924.009, 'ext': 'mp4', 'fps': None, 'protocol': 'm3u8_native', 'preference': None, 'quality': None, 'width': 1920, 'height': 1080, 'vcodec': 'avc1.4d401f', 'acodec': 'mp4a.40.2', 'video_ext': 'mp4', 'audio_ext': 'none', }], { 'en': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/eng_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }], 'fr': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/fra_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }], 'es': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/spa_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }], 'ja': [{ 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }, { 'url': 'https://devstreaming-cdn.apple.com/videos/streaming/examples/bipbop_16x9/subtitles/jpn_forced/prog_index.m3u8', 'ext': 'vtt', 'protocol': 'm3u8_native', }], }, ), ] for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES: with open(f'./test/testdata/m3u8/{m3u8_file}.m3u8', encoding='utf-8') as f: formats, subs = self.ie._parse_m3u8_formats_and_subtitles( f.read(), m3u8_url, ext='mp4') self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subs, expected_subs, None) def test_parse_mpd_formats(self): _TEST_CASES = [ ( # https://github.com/ytdl-org/youtube-dl/issues/13919 # Also tests duplicate representation ids, see # https://github.com/ytdl-org/youtube-dl/issues/15111 'float_duration', 'http://unknown/manifest.mpd', # mpd_url None, # mpd_base_url [{ 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'm4a', 'format_id': '318597', 'format_note': 'DASH audio', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'none', 'tbr': 61.587, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '318597', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.42001f', 'tbr': 318.597, 'width': 340, 'height': 192, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '638590', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.42001f', 'tbr': 638.59, 'width': 512, 'height': 288, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '1022565', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.4d001f', 'tbr': 1022.565, 'width': 688, 'height': 384, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '2046506', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.4d001f', 'tbr': 2046.506, 'width': 1024, 'height': 576, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '3998017', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.640029', 'tbr': 3998.017, 'width': 1280, 'height': 720, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': '5997485', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'none', 'vcodec': 'avc1.640032', 'tbr': 5997.485, 'width': 1920, 'height': 1080, }], {}, ), ( # https://github.com/ytdl-org/youtube-dl/pull/14844 'urls_only', 'http://unknown/manifest.mpd', # mpd_url None, # mpd_base_url [{ 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_144p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 200, 'width': 256, 'height': 144, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_240p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 400, 'width': 424, 'height': 240, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_360p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 800, 'width': 640, 'height': 360, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_480p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 1200, 'width': 856, 'height': 480, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_576p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 1600, 'width': 1024, 'height': 576, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_720p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 2400, 'width': 1280, 'height': 720, }, { 'manifest_url': 'http://unknown/manifest.mpd', 'ext': 'mp4', 'format_id': 'h264_aac_1080p_m4s', 'format_note': 'DASH video', 'protocol': 'http_dash_segments', 'acodec': 'mp4a.40.2', 'vcodec': 'avc3.42c01e', 'tbr': 4400, 'width': 1920, 'height': 1080, }], {}, ), ( # https://github.com/ytdl-org/youtube-dl/issues/20346 # Media considered unfragmented even though it contains # Initialization tag 'unfragmented', 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', # mpd_url 'https://v.redd.it/hw1x7rcg7zl21', # mpd_base_url [{ 'url': 'https://v.redd.it/hw1x7rcg7zl21/audio', 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', 'ext': 'm4a', 'format_id': 'AUDIO-1', 'format_note': 'DASH audio', 'container': 'm4a_dash', 'acodec': 'mp4a.40.2', 'vcodec': 'none', 'tbr': 129.87, 'asr': 48000, }, { 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_240', 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', 'ext': 'mp4', 'format_id': 'VIDEO-2', 'format_note': 'DASH video', 'container': 'mp4_dash', 'acodec': 'none', 'vcodec': 'avc1.4d401e', 'tbr': 608.0, 'width': 240, 'height': 240, 'fps': 30, }, { 'url': 'https://v.redd.it/hw1x7rcg7zl21/DASH_360', 'manifest_url': 'https://v.redd.it/hw1x7rcg7zl21/DASHPlaylist.mpd', 'ext': 'mp4', 'format_id': 'VIDEO-1', 'format_note': 'DASH video', 'container': 'mp4_dash', 'acodec': 'none', 'vcodec': 'avc1.4d401e', 'tbr': 804.261, 'width': 360, 'height': 360, 'fps': 30, }], {}, ), ( 'subtitles', 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/', [{ 'format_id': 'audio=128001', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'm4a', 'tbr': 128.001, 'asr': 48000, 'format_note': 'DASH audio', 'container': 'm4a_dash', 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'audio_ext': 'm4a', 'video_ext': 'none', 'abr': 128.001, }, { 'format_id': 'video=100000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 336, 'height': 144, 'tbr': 100, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 100, }, { 'format_id': 'video=326000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 562, 'height': 240, 'tbr': 326, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 326, }, { 'format_id': 'video=698000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 844, 'height': 360, 'tbr': 698, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 698, }, { 'format_id': 'video=1493000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 1126, 'height': 480, 'tbr': 1493, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 1493, }, { 'format_id': 'video=4482000', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'ext': 'mp4', 'width': 1688, 'height': 720, 'tbr': 4482, 'format_note': 'DASH video', 'container': 'mp4_dash', 'vcodec': 'avc1.4D401F', 'acodec': 'none', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', 'video_ext': 'mp4', 'audio_ext': 'none', 'vbr': 4482, }], { 'en': [ { 'ext': 'mp4', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/manifest.mpd', 'fragment_base_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/dash/', 'protocol': 'http_dash_segments', }, ], }, ), ] for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES: with open(f'./test/testdata/mpd/{mpd_file}.mpd', encoding='utf-8') as f: formats, subtitles = self.ie._parse_mpd_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), mpd_base_url=mpd_base_url, mpd_url=mpd_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) def test_parse_ism_formats(self): _TEST_CASES = [ ( 'sintel', 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', [{ 'format_id': 'audio-128', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'isma', 'tbr': 128, 'asr': 48000, 'vcodec': 'none', 'acodec': 'AACL', 'protocol': 'ism', 'audio_channels': 2, '_download_params': { 'stream_type': 'audio', 'duration': 8880746666, 'timescale': 10000000, 'width': 0, 'height': 0, 'fourcc': 'AACL', 'codec_private_data': '1190', 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-100', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 336, 'height': 144, 'tbr': 100, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 336, 'height': 144, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-326', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 562, 'height': 240, 'tbr': 326, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 562, 'height': 240, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-698', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 844, 'height': 360, 'tbr': 698, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 844, 'height': 360, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-1493', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 1126, 'height': 480, 'tbr': 1493, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 1126, 'height': 480, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video-4482', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'ext': 'ismv', 'width': 1688, 'height': 720, 'tbr': 4482, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', '_download_params': { 'stream_type': 'video', 'duration': 8880746666, 'timescale': 10000000, 'width': 1688, 'height': 720, 'fourcc': 'AVC1', 'codec_private_data': '00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }], { 'eng': [ { 'ext': 'ismt', 'protocol': 'ism', 'url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', 'manifest_url': 'https://sdn-global-streaming-cache-3qsdn.akamaized.net/stream/3144/files/17/07/672975/3144-kZT4LWMQw6Rh7Kpd.ism/Manifest', '_download_params': { 'stream_type': 'text', 'duration': 8880746666, 'timescale': 10000000, 'fourcc': 'TTML', 'codec_private_data': '', }, }, ], }, ), ( 'ec-3_test', 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', [{ 'format_id': 'audio_deu-127', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'isma', 'tbr': 127, 'asr': 48000, 'vcodec': 'none', 'acodec': 'AACL', 'protocol': 'ism', 'language': 'deu', 'audio_channels': 2, '_download_params': { 'stream_type': 'audio', 'duration': 370000000, 'timescale': 10000000, 'width': 0, 'height': 0, 'fourcc': 'AACL', 'language': 'deu', 'codec_private_data': '1190', 'sampling_rate': 48000, 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'audio_deu_1-224', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'isma', 'tbr': 224, 'asr': 48000, 'vcodec': 'none', 'acodec': 'EC-3', 'protocol': 'ism', 'language': 'deu', 'audio_channels': 6, '_download_params': { 'stream_type': 'audio', 'duration': 370000000, 'timescale': 10000000, 'width': 0, 'height': 0, 'fourcc': 'EC-3', 'language': 'deu', 'codec_private_data': '00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00', 'sampling_rate': 48000, 'channels': 6, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-23', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 384, 'height': 216, 'tbr': 23, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 384, 'height': 216, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-403', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 400, 'height': 224, 'tbr': 403, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 400, 'height': 224, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-680', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 640, 'height': 360, 'tbr': 680, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 640, 'height': 360, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-1253', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 640, 'height': 360, 'tbr': 1253, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'vbr': 1253, 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 640, 'height': 360, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-2121', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 768, 'height': 432, 'tbr': 2121, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 768, 'height': 432, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-3275', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 1280, 'height': 720, 'tbr': 3275, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 1280, 'height': 720, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-5300', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 1920, 'height': 1080, 'tbr': 5300, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 1920, 'height': 1080, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }, { 'format_id': 'video_deu-8079', 'url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'manifest_url': 'https://smstr01.dmm.t-online.de/smooth24/smoothstream_m1/streaming/sony/9221438342941275747/636887760842957027/25_km_h-Trailer-9221571562372022953_deu_20_1300k_HD_H_264_ISMV.ism/Manifest', 'ext': 'ismv', 'width': 1920, 'height': 1080, 'tbr': 8079, 'vcodec': 'AVC1', 'acodec': 'none', 'protocol': 'ism', 'language': 'deu', '_download_params': { 'stream_type': 'video', 'duration': 370000000, 'timescale': 10000000, 'width': 1920, 'height': 1080, 'fourcc': 'AVC1', 'language': 'deu', 'codec_private_data': '00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80', 'channels': 2, 'bits_per_sample': 16, 'nal_unit_length_field': 4, }, }], {}, ), ] for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES: with open(f'./test/testdata/ism/{ism_file}.Manifest', encoding='utf-8') as f: formats, subtitles = self.ie._parse_ism_formats_and_subtitles( compat_etree_fromstring(f.read().encode()), ism_url=ism_url) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) expect_value(self, subtitles, expected_subtitles, None) def test_parse_f4m_formats(self): _TEST_CASES = [ ( # https://github.com/ytdl-org/youtube-dl/issues/14660 'custom_base_url', 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', [{ 'manifest_url': 'http://api.new.livestream.com/accounts/6115179/events/6764928/videos/144884262.f4m', 'ext': 'flv', 'format_id': '2148', 'protocol': 'f4m', 'tbr': 2148, 'width': 1280, 'height': 720, }], ), ] for f4m_file, f4m_url, expected_formats in _TEST_CASES: with open(f'./test/testdata/f4m/{f4m_file}.f4m', encoding='utf-8') as f: formats = self.ie._parse_f4m_formats( compat_etree_fromstring(f.read().encode()), f4m_url, None) self.ie._sort_formats(formats) expect_value(self, formats, expected_formats, None) def test_parse_xspf(self): _TEST_CASES = [ ( 'foo_xspf', 'https://example.org/src/foo_xspf.xspf', [{ 'id': 'foo_xspf', 'title': 'Pandemonium', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 202.416, 'formats': [{ 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.org/src/cd1/track%201.mp3', }], }, { 'id': 'foo_xspf', 'title': 'Final Cartridge (Nichico Twelve Remix)', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 255.857, 'formats': [{ 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.org/%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3', }], }, { 'id': 'foo_xspf', 'title': 'Rebuilding Nightingale', 'description': 'Visit http://bigbrother404.bandcamp.com', 'duration': 287.915, 'formats': [{ 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.org/src/track3.mp3', }, { 'manifest_url': 'https://example.org/src/foo_xspf.xspf', 'url': 'https://example.com/track3.mp3', }], }], ), ] for xspf_file, xspf_url, expected_entries in _TEST_CASES: with open(f'./test/testdata/xspf/{xspf_file}.xspf', encoding='utf-8') as f: entries = self.ie._parse_xspf( compat_etree_fromstring(f.read().encode()), xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url) expect_value(self, entries, expected_entries, None) for i in range(len(entries)): expect_dict(self, entries[i], expected_entries[i]) def test_response_with_expected_status_returns_content(self): # Checks for mitigations against the effects of # <https://bugs.python.org/issue15002> that affect Python 3.4.1+, which # manifest as `_download_webpage`, `_download_xml`, `_download_json`, # or the underlying `_download_webpage_handle` returning no content # when a response matches `expected_status`. httpd = http.server.HTTPServer( ('127.0.0.1', 0), InfoExtractorTestRequestHandler) port = http_server_port(httpd) server_thread = threading.Thread(target=httpd.serve_forever) server_thread.daemon = True server_thread.start() (content, urlh) = self.ie._download_webpage_handle( f'http://127.0.0.1:{port}/teapot', None, expected_status=TEAPOT_RESPONSE_STATUS) self.assertEqual(content, TEAPOT_RESPONSE_BODY) def test_search_nextjs_data(self): data = '<script id="__NEXT_DATA__" type="application/json">{"props":{}}</script>' self.assertEqual(self.ie._search_nextjs_data(data, None), {'props': {}}) self.assertEqual(self.ie._search_nextjs_data('', None, fatal=False), {}) self.assertEqual(self.ie._search_nextjs_data('', None, default=None), None) self.assertEqual(self.ie._search_nextjs_data('', None, default={}), {}) with self.assertWarns(DeprecationWarning): self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {}) if __name__ == '__main__': unittest.main() ������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_YoutubeDL.py������������������������������������������������������������0000664�0000000�0000000�00000164730�14675634471�0017632�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest from unittest.mock import patch sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import copy import json from test.helper import FakeYDL, assertRegexpMatches, try_rm from yt_dlp import YoutubeDL from yt_dlp.compat import compat_os_name from yt_dlp.extractor import YoutubeIE from yt_dlp.extractor.common import InfoExtractor from yt_dlp.postprocessor.common import PostProcessor from yt_dlp.utils import ( ExtractorError, LazyList, OnDemandPagedList, int_or_none, match_filter_func, ) from yt_dlp.utils.traversal import traverse_obj TEST_URL = 'http://localhost/sample.mp4' class YDL(FakeYDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.downloaded_info_dicts = [] self.msgs = [] def process_info(self, info_dict): self.downloaded_info_dicts.append(info_dict.copy()) def to_screen(self, msg, *args, **kwargs): self.msgs.append(msg) def dl(self, *args, **kwargs): assert False, 'Downloader must not be invoked for test_YoutubeDL' def _make_result(formats, **kwargs): res = { 'formats': formats, 'id': 'testid', 'title': 'testttitle', 'extractor': 'testex', 'extractor_key': 'TestEx', 'webpage_url': 'http://example.com/watch?v=shenanigans', } res.update(**kwargs) return res class TestFormatSelection(unittest.TestCase): def test_prefer_free_formats(self): # Same resolution => download webm ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ {'ext': 'webm', 'height': 460, 'url': TEST_URL}, {'ext': 'mp4', 'height': 460, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') # Different resolution => download best quality (mp4) ydl = YDL() ydl.params['prefer_free_formats'] = True formats = [ {'ext': 'webm', 'height': 720, 'url': TEST_URL}, {'ext': 'mp4', 'height': 1080, 'url': TEST_URL}, ] info_dict['formats'] = formats ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') # No prefer_free_formats => prefer mp4 and webm ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ {'ext': 'webm', 'height': 720, 'url': TEST_URL}, {'ext': 'mp4', 'height': 720, 'url': TEST_URL}, {'ext': 'flv', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'mp4') ydl = YDL() ydl.params['prefer_free_formats'] = False formats = [ {'ext': 'flv', 'height': 720, 'url': TEST_URL}, {'ext': 'webm', 'height': 720, 'url': TEST_URL}, ] info_dict['formats'] = formats ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['ext'], 'webm') def test_format_selection(self): formats = [ {'format_id': '35', 'ext': 'mp4', 'preference': 0, 'url': TEST_URL}, {'format_id': 'example-with-dashes', 'ext': 'webm', 'preference': 1, 'url': TEST_URL}, {'format_id': '45', 'ext': 'webm', 'preference': 2, 'url': TEST_URL}, {'format_id': '47', 'ext': 'webm', 'preference': 3, 'url': TEST_URL}, {'format_id': '2', 'ext': 'flv', 'preference': 4, 'url': TEST_URL}, ] info_dict = _make_result(formats) def test(inp, *expected, multi=False): ydl = YDL({ 'format': inp, 'allow_multiple_video_streams': multi, 'allow_multiple_audio_streams': multi, }) ydl.process_ie_result(info_dict.copy()) downloaded = [x['format_id'] for x in ydl.downloaded_info_dicts] self.assertEqual(downloaded, list(expected)) test('20/47', '47') test('20/71/worst', '35') test(None, '2') test('webm/mp4', '47') test('3gp/40/mp4', '35') test('example-with-dashes', 'example-with-dashes') test('all', '2', '47', '45', 'example-with-dashes', '35') test('mergeall', '2+47+45+example-with-dashes+35', multi=True) # See: https://github.com/yt-dlp/yt-dlp/pulls/8797 test('7_a/worst', '35') def test_format_selection_audio(self): formats = [ {'format_id': 'audio-low', 'ext': 'webm', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'audio-mid', 'ext': 'webm', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'audio-high', 'ext': 'flv', 'preference': 3, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 4, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestaudio'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'audio-high') ydl = YDL({'format': 'worstaudio'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'audio-low') formats = [ {'format_id': 'vid-low', 'ext': 'mp4', 'preference': 1, 'url': TEST_URL}, {'format_id': 'vid-high', 'ext': 'mp4', 'preference': 2, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestaudio/worstaudio/best'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-high') def test_format_selection_audio_exts(self): formats = [ {'format_id': 'mp3-64', 'ext': 'mp3', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'ogg-64', 'ext': 'ogg', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'aac-64', 'ext': 'aac', 'abr': 64, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'mp3-32', 'ext': 'mp3', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, {'format_id': 'aac-32', 'ext': 'aac', 'abr': 32, 'url': 'http://_', 'vcodec': 'none'}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'best', 'format_sort': ['abr', 'ext']}) ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'aac-64') ydl = YDL({'format': 'mp3'}) ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'mp3-64') ydl = YDL({'prefer_free_formats': True, 'format_sort': ['abr', 'ext']}) ydl.sort_formats(info_dict) ydl.process_ie_result(copy.deepcopy(info_dict)) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'ogg-64') def test_format_selection_video(self): formats = [ {'format_id': 'dash-video-low', 'ext': 'mp4', 'preference': 1, 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'dash-video-high', 'ext': 'mp4', 'preference': 2, 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'vid', 'ext': 'mp4', 'preference': 3, 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestvideo'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-high') ydl = YDL({'format': 'worstvideo'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-low') ydl = YDL({'format': 'bestvideo[format_id^=dash][format_id$=low]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'dash-video-low') formats = [ {'format_id': 'vid-vcodec-dot', 'ext': 'mp4', 'preference': 1, 'vcodec': 'avc1.123456', 'acodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestvideo[vcodec=avc1.123456]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vid-vcodec-dot') def test_format_selection_by_vcodec_sort(self): formats = [ {'format_id': 'av1-format', 'ext': 'mp4', 'vcodec': 'av1', 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'vp9-hdr-format', 'ext': 'mp4', 'vcodec': 'vp09.02.50.10.01.09.18.09.00', 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'vp9-sdr-format', 'ext': 'mp4', 'vcodec': 'vp09.00.50.08', 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'h265-format', 'ext': 'mp4', 'vcodec': 'h265', 'acodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9.2']}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vp9-hdr-format') ydl = YDL({'format': 'bestvideo', 'format_sort': ['vcodec:vp9']}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vp9-sdr-format') ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9.2']}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vp9-hdr-format') ydl = YDL({'format': 'bestvideo', 'format_sort': ['+vcodec:vp9']}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'vp9-sdr-format') def test_format_selection_string_ops(self): formats = [ {'format_id': 'abc-cba', 'ext': 'mp4', 'url': TEST_URL}, {'format_id': 'zxc-cxz', 'ext': 'webm', 'url': TEST_URL}, ] info_dict = _make_result(formats) # equals (=) ydl = YDL({'format': '[format_id=abc-cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not equal (!=) ydl = YDL({'format': '[format_id!=abc-cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!=abc-cba][format_id!=zxc-cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # starts with (^=) ydl = YDL({'format': '[format_id^=abc]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not start with (!^=) ydl = YDL({'format': '[format_id!^=abc]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!^=abc][format_id!^=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # ends with ($=) ydl = YDL({'format': '[format_id$=cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not end with (!$=) ydl = YDL({'format': '[format_id!$=cba]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!$=cba][format_id!$=cxz]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) # contains (*=) ydl = YDL({'format': '[format_id*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'abc-cba') # does not contain (!*=) ydl = YDL({'format': '[format_id!*=bc-cb]'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'zxc-cxz') ydl = YDL({'format': '[format_id!*=abc][format_id!*=zxc]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) ydl = YDL({'format': '[format_id!*=-]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_youtube_format_selection(self): # FIXME: Rewrite in accordance with the new format sorting options return order = [ '38', '37', '46', '22', '45', '35', '44', '18', '34', '43', '6', '5', '17', '36', '13', # Apple HTTP Live Streaming '96', '95', '94', '93', '92', '132', '151', # 3D '85', '84', '102', '83', '101', '82', '100', # Dash video '137', '248', '136', '247', '135', '246', '245', '244', '134', '243', '133', '242', '160', # Dash audio '141', '172', '140', '171', '139', ] def format_info(f_id): info = YoutubeIE._formats[f_id].copy() # XXX: In real cases InfoExtractor._parse_mpd_formats() fills up 'acodec' # and 'vcodec', while in tests such information is incomplete since # commit a6c2c24479e5f4827ceb06f64d855329c0a6f593 # test_YoutubeDL.test_youtube_format_selection is broken without # this fix if 'acodec' in info and 'vcodec' not in info: info['vcodec'] = 'none' elif 'vcodec' in info and 'acodec' not in info: info['acodec'] = 'none' info['format_id'] = f_id info['url'] = 'url:' + f_id return info formats_order = [format_info(f_id) for f_id in order] info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo+bestaudio'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '248+172') self.assertEqual(downloaded['ext'], 'mp4') info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo[height>=999999]+bestaudio/best'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], '38') info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': 'bestvideo/best,bestaudio'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['137', '141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])+bestaudio'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['137+141', '248+141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=mp4],bestvideo[ext=webm])[height<=720]+bestaudio'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['136+141', '247+141']) info_dict = _make_result(list(formats_order), extractor='youtube') ydl = YDL({'format': '(bestvideo[ext=none]/bestvideo[ext=webm])+bestaudio'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['248+141']) for f1, f2 in zip(formats_order, formats_order[1:]): info_dict = _make_result([f1, f2], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) info_dict = _make_result([f2, f1], extractor='youtube') ydl = YDL({'format': 'best/bestvideo'}) ydl.sort_formats(info_dict) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], f1['format_id']) def test_audio_only_extractor_format_selection(self): # For extractors with incomplete formats (all formats are audio-only or # video-only) best and worst should fallback to corresponding best/worst # video-only or audio-only formats (as per # https://github.com/ytdl-org/youtube-dl/pull/5556) formats = [ {'format_id': 'low', 'ext': 'mp3', 'preference': 1, 'vcodec': 'none', 'url': TEST_URL}, {'format_id': 'high', 'ext': 'mp3', 'preference': 2, 'vcodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'best'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'high') ydl = YDL({'format': 'worst'}) ydl.process_ie_result(info_dict.copy()) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'low') def test_format_not_available(self): formats = [ {'format_id': 'regular', 'ext': 'mp4', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'ext': 'mp4', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) # This must fail since complete video-audio format does not match filter # and extractor does not provide incomplete only formats (i.e. only # video-only or audio-only). ydl = YDL({'format': 'best[height>360]'}) self.assertRaises(ExtractorError, ydl.process_ie_result, info_dict.copy()) def test_format_selection_issue_10083(self): # See https://github.com/ytdl-org/youtube-dl/issues/10083 formats = [ {'format_id': 'regular', 'height': 360, 'url': TEST_URL}, {'format_id': 'video', 'height': 720, 'acodec': 'none', 'url': TEST_URL}, {'format_id': 'audio', 'vcodec': 'none', 'url': TEST_URL}, ] info_dict = _make_result(formats) ydl = YDL({'format': 'best[height>360]/bestvideo[height>360]+bestaudio'}) ydl.process_ie_result(info_dict.copy()) self.assertEqual(ydl.downloaded_info_dicts[0]['format_id'], 'video+audio') def test_invalid_format_specs(self): def assert_syntax_error(format_spec): self.assertRaises(SyntaxError, YDL, {'format': format_spec}) assert_syntax_error('bestvideo,,best') assert_syntax_error('+bestaudio') assert_syntax_error('bestvideo+') assert_syntax_error('/') assert_syntax_error('[720<height]') def test_format_filtering(self): formats = [ {'format_id': 'A', 'filesize': 500, 'width': 1000}, {'format_id': 'B', 'filesize': 1000, 'width': 500}, {'format_id': 'C', 'filesize': 1000, 'width': 400}, {'format_id': 'D', 'filesize': 2000, 'width': 600}, {'format_id': 'E', 'filesize': 3000}, {'format_id': 'F'}, {'format_id': 'G', 'filesize': 1000000}, ] for f in formats: f['url'] = 'http://_/' f['ext'] = 'unknown' info_dict = _make_result(formats, _format_sort_fields=('id', )) ydl = YDL({'format': 'best[filesize<3000]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'D') ydl = YDL({'format': 'best[filesize<=3000]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'E') ydl = YDL({'format': 'best[filesize <= ? 3000]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'F') ydl = YDL({'format': 'best [filesize = 1000] [width>450]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'B') ydl = YDL({'format': 'best [filesize = 1000] [width!=450]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'C') ydl = YDL({'format': '[filesize>?1]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') ydl = YDL({'format': '[filesize<1M]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'E') ydl = YDL({'format': '[filesize<1MiB]'}) ydl.process_ie_result(info_dict) downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['format_id'], 'G') ydl = YDL({'format': 'all[width>=400][width<=600]'}) ydl.process_ie_result(info_dict) downloaded_ids = [info['format_id'] for info in ydl.downloaded_info_dicts] self.assertEqual(downloaded_ids, ['D', 'C', 'B']) ydl = YDL({'format': 'best[height<40]'}) with contextlib.suppress(ExtractorError): ydl.process_ie_result(info_dict) self.assertEqual(ydl.downloaded_info_dicts, []) @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', False) def test_default_format_spec_without_ffmpeg(self): ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'outtmpl': '-'}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.available', True) @patch('yt_dlp.postprocessor.ffmpeg.FFmpegMergerPP.can_merge', lambda _: True) def test_default_format_spec_with_ffmpeg(self): ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'simulate': True}) self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') ydl = YDL({'outtmpl': '-'}) self.assertEqual(ydl._default_format_spec({}), 'best/bestvideo+bestaudio') ydl = YDL({}) self.assertEqual(ydl._default_format_spec({}), 'bestvideo*+bestaudio/best') self.assertEqual(ydl._default_format_spec({'is_live': True}), 'best/bestvideo+bestaudio') class TestYoutubeDL(unittest.TestCase): def test_subtitles(self): def s_formats(lang, autocaption=False): return [{ 'ext': ext, 'url': f'http://localhost/video.{lang}.{ext}', '_auto': autocaption, } for ext in ['vtt', 'srt', 'ass']] subtitles = {l: s_formats(l) for l in ['en', 'fr', 'es']} auto_captions = {l: s_formats(l, True) for l in ['it', 'pt', 'es']} info_dict = { 'id': 'test', 'title': 'Test', 'url': 'http://localhost/video.mp4', 'subtitles': subtitles, 'automatic_captions': auto_captions, 'extractor': 'TEST', 'webpage_url': 'http://example.com/watch?v=shenanigans', } def get_info(params={}): params.setdefault('simulate', True) ydl = YDL(params) ydl.report_warning = lambda *args, **kargs: None return ydl.process_video_result(info_dict, download=False) result = get_info() self.assertFalse(result.get('requested_subtitles')) self.assertEqual(result['subtitles'], subtitles) self.assertEqual(result['automatic_captions'], auto_captions) result = get_info({'writesubtitles': True}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'en'}) self.assertTrue(subs['en'].get('data') is None) self.assertEqual(subs['en']['ext'], 'ass') result = get_info({'writesubtitles': True, 'subtitlesformat': 'foo/srt'}) subs = result['requested_subtitles'] self.assertEqual(subs['en']['ext'], 'srt') result = get_info({'writesubtitles': True, 'subtitleslangs': ['es', 'fr', 'it']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['all', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['en', 'fr', '-en']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'fr'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['-en', 'en']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'en'}) result = get_info({'writesubtitles': True, 'subtitleslangs': ['e.+']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'en'}) result = get_info({'writesubtitles': True, 'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertFalse(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) result = get_info({'writeautomaticsub': True, 'subtitleslangs': ['es', 'pt']}) subs = result['requested_subtitles'] self.assertTrue(subs) self.assertEqual(set(subs.keys()), {'es', 'pt'}) self.assertTrue(subs['es']['_auto']) self.assertTrue(subs['pt']['_auto']) def test_add_extra_info(self): test_dict = { 'extractor': 'Foo', } extra_info = { 'extractor': 'Bar', 'playlist': 'funny videos', } YDL.add_extra_info(test_dict, extra_info) self.assertEqual(test_dict['extractor'], 'Foo') self.assertEqual(test_dict['playlist'], 'funny videos') outtmpl_info = { 'id': '1234', 'ext': 'mp4', 'width': None, 'height': 1080, 'filesize': 1024, 'title1': '$PATH', 'title2': '%PATH%', 'title3': 'foo/bar\\test', 'title4': 'foo "bar" test', 'title5': 'áéí 𝐀', 'timestamp': 1618488000, 'duration': 100000, 'playlist_index': 1, 'playlist_autonumber': 2, '__last_playlist_index': 100, 'n_entries': 10, 'formats': [ {'id': 'id 1', 'height': 1080, 'width': 1920}, {'id': 'id 2', 'height': 720}, {'id': 'id 3'}, ], } def test_prepare_outtmpl_and_filename(self): def test(tmpl, expected, *, info=None, **params): params['outtmpl'] = tmpl ydl = FakeYDL(params) ydl._num_downloads = 1 self.assertEqual(ydl.validate_outtmpl(tmpl), None) out = ydl.evaluate_outtmpl(tmpl, info or self.outtmpl_info) fname = ydl.prepare_filename(info or self.outtmpl_info) if not isinstance(expected, (list, tuple)): expected = (expected, expected) for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected): if callable(expect): self.assertTrue(expect(got), f'Wrong {name} from {tmpl}') elif expect is not None: self.assertEqual(got, expect, f'Wrong {name} from {tmpl}') # Side-effects original_infodict = dict(self.outtmpl_info) test('foo.bar', 'foo.bar') original_infodict['epoch'] = self.outtmpl_info.get('epoch') self.assertTrue(isinstance(original_infodict['epoch'], int)) test('%(epoch)d', int_or_none) self.assertEqual(original_infodict, self.outtmpl_info) # Auto-generated fields test('%(id)s.%(ext)s', '1234.mp4') test('%(duration_string)s', ('27:46:40', '27-46-40')) test('%(resolution)s', '1080p') test('%(playlist_index|)s', '001') test('%(playlist_index&{}!)s', '1!') test('%(playlist_autonumber)s', '02') test('%(autonumber)s', '00001') test('%(autonumber+2)03d', '005', autonumber_start=3) test('%(autonumber)s', '001', autonumber_size=3) # Escaping % test('%', '%') test('%%', '%') test('%%%%', '%%') test('%s', '%s') test('%%%s', '%%s') test('%d', '%d') test('%abc%', '%abc%') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') test('%%%(height)s', '%1080') test('%(width)06d.%(ext)s', 'NA.mp4') test('%(width)06d.%%(ext)s', 'NA.%(ext)s') test('%%(width)06d.%(ext)s', '%(width)06d.mp4') # ID sanitization test('%(id)s', '_abcd', info={'id': '_abcd'}) test('%(some_id)s', '_abcd', info={'some_id': '_abcd'}) test('%(formats.0.id)s', '_abcd', info={'formats': [{'id': '_abcd'}]}) test('%(id)s', '-abcd', info={'id': '-abcd'}) test('%(id)s', '.abcd', info={'id': '.abcd'}) test('%(id)s', 'ab__cd', info={'id': 'ab__cd'}) test('%(id)s', ('ab:cd', 'ab:cd'), info={'id': 'ab:cd'}) test('%(id.0)s', '-', info={'id': '--'}) # Invalid templates self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError)) test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none') test('%(..)s', 'NA') test('%(formats.{id)s', 'NA') # Entire info_dict def expect_same_infodict(out): got_dict = json.loads(out) for info_field, expected in self.outtmpl_info.items(): self.assertEqual(got_dict.get(info_field), expected, info_field) return True test('%()j', (expect_same_infodict, None)) # NA placeholder NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s' test(NA_TEST_OUTTMPL, 'NA-NA-def-1234.mp4') test(NA_TEST_OUTTMPL, 'none-none-def-1234.mp4', outtmpl_na_placeholder='none') test(NA_TEST_OUTTMPL, '--def-1234.mp4', outtmpl_na_placeholder='') test('%(non_existent.0)s', 'NA') # String formatting FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s' test(FMT_TEST_OUTTMPL % 's', '1080.mp4') test(FMT_TEST_OUTTMPL % 'd', '1080.mp4') test(FMT_TEST_OUTTMPL % '6d', ' 1080.mp4') test(FMT_TEST_OUTTMPL % '-6d', '1080 .mp4') test(FMT_TEST_OUTTMPL % '06d', '001080.mp4') test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % ' 06d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % '0 6d', ' 01080.mp4') test(FMT_TEST_OUTTMPL % ' 0 6d', ' 01080.mp4') # Type casting test('%(id)d', '1234') test('%(height)c', '1') test('%(ext)c', 'm') test('%(id)d %(id)r', "1234 '1234'") test('%(id)r %(height)r', "'1234' 1080") test('%(title5)a %(height)a', (R"'\xe1\xe9\xed \U0001d400' 1080", None)) test('%(ext)s-%(ext|def)d', 'mp4-def') test('%(width|0)04d', '0') test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none') FORMATS = self.outtmpl_info['formats'] # Custom type casting test('%(formats.:.id)l', 'id 1, id 2, id 3') test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3')) test('%(ext)l', 'mp4') test('%(formats.:.id) 18l', ' id 1, id 2, id 3') test('%(formats)j', (json.dumps(FORMATS), None)) test('%(formats)#j', ( json.dumps(FORMATS, indent=4), json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', '"').replace('\n', ' '), )) test('%(title5).3B', 'á') test('%(title5)U', 'áéí 𝐀') test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀') test('%(title5)+U', 'áéí A') test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A') test('%(height)D', '1k') test('%(filesize)#D', '1Ki') test('%(height)5.2D', ' 1.08k') test('%(title4)#S', 'foo_bar_test') test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' '))) if compat_os_name == 'nt': test('%(title4)q', ('"foo ""bar"" test"', None)) test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None)) test('%(formats.0.id)#q', ('"id 1"', None)) else: test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\'')) test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'") test('%(formats.0.id)#q', "'id 1'") # Internal formatting test('%(timestamp-1000>%H-%M-%S)s', '11-43-20') test('%(title|%)s %(title|%%)s', '% %%') test('%(id+1-height+3)05d', '00158') test('%(width+100)05d', 'NA') test('%(filesize*8)d', '8192') test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None)) test('%(formats.0)r', (repr(FORMATS[0]), None)) test('%(height.0)03d', '001') test('%(-height.0)04d', '-001') test('%(formats.-1.id)s', FORMATS[-1]['id']) test('%(formats.0.id.-1)d', FORMATS[0]['id'][-1]) test('%(formats.3)s', 'NA') test('%(formats.:2:-1)r', repr(FORMATS[:2:-1])) test('%(formats.0.id.-1+id)f', '1235.000000') test('%(formats.0.id.-1+formats.1.id.-1)d', '3') out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]} if 'height' in f else {'id': f['id']} for f in FORMATS]) test('%(formats.:.{id,height.:2})j', (out, None)) test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS)) test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}')) # Alternates test('%(title,id)s', '1234') test('%(width-100,height+20|def)d', '1100') test('%(width-100,height+width|def)s', 'def') test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00') # Replacement test('%(id&foo)s.bar', 'foo.bar') test('%(title&foo)s.bar', 'NA.bar') test('%(title&foo|baz)s.bar', 'baz.bar') test('%(x,id&foo|baz)s.bar', 'foo.bar') test('%(x,title&foo|baz)s.bar', 'baz.bar') test('%(id&a\nb|)s', ('a\nb', 'a b')) test('%(id&hi {:>10} {}|)s', 'hi 1234 1234') test(R'%(id&{0} {}|)s', 'NA') test(R'%(id&{0.1}|)s', 'NA') test('%(height&{:,d})S', '1,080') # Laziness def gen(): yield from range(5) raise self.assertTrue(False, 'LazyList should not be evaluated till here') test('%(key.4)s', '4', info={'key': LazyList(gen())}) # Empty filename test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4') # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4')) # FIXME: ? # test('%(foo|)s', ('', '_')) # FIXME: ? # Environment variable expansion for prepare_filename os.environ['__yt_dlp_var'] = 'expanded' envvar = '%__yt_dlp_var%' if compat_os_name == 'nt' else '$__yt_dlp_var' test(envvar, (envvar, 'expanded')) if compat_os_name == 'nt': test('%s%', ('%s%', '%s%')) os.environ['s'] = 'expanded' test('%s%', ('%s%', 'expanded')) # %s% should be expanded before escaping %s os.environ['(test)s'] = 'expanded' test('%(test)s%', ('NA%', 'expanded')) # Environment should take priority over template # Path expansion and escaping test('Hello %(title1)s', 'Hello $PATH') test('Hello %(title2)s', 'Hello %PATH%') test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test')) test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test')) def test_format_note(self): ydl = YoutubeDL() self.assertEqual(ydl._format_note({}), '') assertRegexpMatches(self, ydl._format_note({ 'vbr': 10, }), r'^\s*10k$') assertRegexpMatches(self, ydl._format_note({ 'fps': 30, }), r'^30fps$') def test_postprocessors(self): filename = 'post-processor-testfile.mp4' audiofile = filename + '.mp3' class SimplePP(PostProcessor): def run(self, info): with open(audiofile, 'w') as f: f.write('EXAMPLE') return [info['filepath']], info def run_pp(params, pp): with open(filename, 'w') as f: f.write('EXAMPLE') ydl = YoutubeDL(params) ydl.add_post_processor(pp()) ydl.post_process(filename, {'filepath': filename}) run_pp({'keepvideo': True}, SimplePP) self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist') self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist') os.unlink(filename) os.unlink(audiofile) run_pp({'keepvideo': False}, SimplePP) self.assertFalse(os.path.exists(filename), f'{filename} exists') self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist') os.unlink(audiofile) class ModifierPP(PostProcessor): def run(self, info): with open(info['filepath'], 'w') as f: f.write('MODIFIED') return [], info run_pp({'keepvideo': False}, ModifierPP) self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist') os.unlink(filename) def test_match_filter(self): first = { 'id': '1', 'url': TEST_URL, 'title': 'one', 'extractor': 'TEST', 'duration': 30, 'filesize': 10 * 1024, 'playlist_id': '42', 'uploader': '變態妍字幕版 太妍 тест', 'creator': "тест ' 123 ' тест--", 'webpage_url': 'http://example.com/watch?v=shenanigans', } second = { 'id': '2', 'url': TEST_URL, 'title': 'two', 'extractor': 'TEST', 'duration': 10, 'description': 'foo', 'filesize': 5 * 1024, 'playlist_id': '43', 'uploader': 'тест 123', 'webpage_url': 'http://example.com/watch?v=SHENANIGANS', } videos = [first, second] def get_videos(filter_=None): ydl = YDL({'match_filter': filter_, 'simulate': True}) for v in videos: ydl.process_ie_result(v.copy(), download=True) return [v['id'] for v in ydl.downloaded_info_dicts] res = get_videos() self.assertEqual(res, ['1', '2']) def f(v, incomplete): if v['id'] == '1': return None else: return 'Video id is not 1' res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('duration < 30') res = get_videos(f) self.assertEqual(res, ['2']) f = match_filter_func('description = foo') res = get_videos(f) self.assertEqual(res, ['2']) f = match_filter_func('description =? foo') res = get_videos(f) self.assertEqual(res, ['1', '2']) f = match_filter_func('filesize > 5KiB') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('playlist_id = 42') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('uploader = "變態妍字幕版 太妍 тест"') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func('uploader != "變態妍字幕版 太妍 тест"') res = get_videos(f) self.assertEqual(res, ['2']) f = match_filter_func('creator = "тест \' 123 \' тест--"') res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func("creator = 'тест \\' 123 \\' тест--'") res = get_videos(f) self.assertEqual(res, ['1']) f = match_filter_func(r"creator = 'тест \' 123 \' тест--' & duration > 30") res = get_videos(f) self.assertEqual(res, []) def test_playlist_items_selection(self): INDICES, PAGE_SIZE = list(range(1, 11)), 3 def entry(i, evaluated): evaluated.append(i) return { 'id': str(i), 'title': str(i), 'url': TEST_URL, } def pagedlist_entries(evaluated): def page_func(n): start = PAGE_SIZE * n for i in INDICES[start: start + PAGE_SIZE]: yield entry(i, evaluated) return OnDemandPagedList(page_func, PAGE_SIZE) def page_num(i): return (i + PAGE_SIZE - 1) // PAGE_SIZE def generator_entries(evaluated): for i in INDICES: yield entry(i, evaluated) def list_entries(evaluated): return list(generator_entries(evaluated)) def lazylist_entries(evaluated): return LazyList(generator_entries(evaluated)) def get_downloaded_info_dicts(params, entries): ydl = YDL(params) ydl.process_ie_result({ '_type': 'playlist', 'id': 'test', 'extractor': 'test:playlist', 'extractor_key': 'test:playlist', 'webpage_url': 'http://example.com', 'entries': entries, }) return ydl.downloaded_info_dicts def test_selection(params, expected_ids, evaluate_all=False): expected_ids = list(expected_ids) if evaluate_all: generator_eval = pagedlist_eval = INDICES elif not expected_ids: generator_eval = pagedlist_eval = [] else: generator_eval = INDICES[0: max(expected_ids)] pagedlist_eval = INDICES[PAGE_SIZE * page_num(min(expected_ids)) - PAGE_SIZE: PAGE_SIZE * page_num(max(expected_ids))] for name, func, expected_eval in ( ('list', list_entries, INDICES), ('Generator', generator_entries, generator_eval), # ('LazyList', lazylist_entries, generator_eval), # Generator and LazyList follow the exact same code path ('PagedList', pagedlist_entries, pagedlist_eval), ): evaluated = [] entries = func(evaluated) results = [(v['playlist_autonumber'] - 1, (int(v['id']), v['playlist_index'])) for v in get_downloaded_info_dicts(params, entries)] self.assertEqual(results, list(enumerate(zip(expected_ids, expected_ids))), f'Entries of {name} for {params}') self.assertEqual(sorted(evaluated), expected_eval, f'Evaluation of {name} for {params}') test_selection({}, INDICES) test_selection({'playlistend': 20}, INDICES, True) test_selection({'playlistend': 2}, INDICES[:2]) test_selection({'playliststart': 11}, [], True) test_selection({'playliststart': 2}, INDICES[1:]) test_selection({'playlist_items': '2-4'}, INDICES[1:4]) test_selection({'playlist_items': '2,4'}, [2, 4]) test_selection({'playlist_items': '20'}, [], True) test_selection({'playlist_items': '0'}, []) # Tests for https://github.com/ytdl-org/youtube-dl/issues/10591 test_selection({'playlist_items': '2-4,3-4,3'}, [2, 3, 4]) test_selection({'playlist_items': '4,2'}, [4, 2]) # Tests for https://github.com/yt-dlp/yt-dlp/issues/720 # https://github.com/yt-dlp/yt-dlp/issues/302 test_selection({'playlistreverse': True}, INDICES[::-1]) test_selection({'playliststart': 2, 'playlistreverse': True}, INDICES[:0:-1]) test_selection({'playlist_items': '2,4', 'playlistreverse': True}, [4, 2]) test_selection({'playlist_items': '4,2'}, [4, 2]) # Tests for --playlist-items start:end:step test_selection({'playlist_items': ':'}, INDICES, True) test_selection({'playlist_items': '::1'}, INDICES, True) test_selection({'playlist_items': '::-1'}, INDICES[::-1], True) test_selection({'playlist_items': ':6'}, INDICES[:6]) test_selection({'playlist_items': ':-6'}, INDICES[:-5], True) test_selection({'playlist_items': '-1:6:-2'}, INDICES[:4:-2], True) test_selection({'playlist_items': '9:-6:-2'}, INDICES[8:3:-2], True) test_selection({'playlist_items': '1:inf:2'}, INDICES[::2], True) test_selection({'playlist_items': '-2:inf'}, INDICES[-2:], True) test_selection({'playlist_items': ':inf:-1'}, [], True) test_selection({'playlist_items': '0-2:2'}, [2]) test_selection({'playlist_items': '1-:2'}, INDICES[::2], True) test_selection({'playlist_items': '0--2:2'}, INDICES[1:-1:2], True) test_selection({'playlist_items': '10::3'}, [10], True) test_selection({'playlist_items': '-1::3'}, [10], True) test_selection({'playlist_items': '11::3'}, [], True) test_selection({'playlist_items': '-15::2'}, INDICES[1::2], True) test_selection({'playlist_items': '-15::15'}, [], True) def test_do_not_override_ie_key_in_url_transparent(self): ydl = YDL() class Foo1IE(InfoExtractor): _VALID_URL = r'foo1:' def _real_extract(self, url): return { '_type': 'url_transparent', 'url': 'foo2:', 'ie_key': 'Foo2', 'title': 'foo1 title', 'id': 'foo1_id', } class Foo2IE(InfoExtractor): _VALID_URL = r'foo2:' def _real_extract(self, url): return { '_type': 'url', 'url': 'foo3:', 'ie_key': 'Foo3', } class Foo3IE(InfoExtractor): _VALID_URL = r'foo3:' def _real_extract(self, url): return _make_result([{'url': TEST_URL}], title='foo3 title') ydl.add_info_extractor(Foo1IE(ydl)) ydl.add_info_extractor(Foo2IE(ydl)) ydl.add_info_extractor(Foo3IE(ydl)) ydl.extract_info('foo1:') downloaded = ydl.downloaded_info_dicts[0] self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'foo1 title') self.assertEqual(downloaded['id'], 'testid') self.assertEqual(downloaded['extractor'], 'testex') self.assertEqual(downloaded['extractor_key'], 'TestEx') # Test case for https://github.com/ytdl-org/youtube-dl/issues/27064 def test_ignoreerrors_for_playlist_with_url_transparent_iterable_entries(self): class _YDL(YDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) def trouble(self, s, tb=None): pass ydl = _YDL({ 'format': 'extra', 'ignoreerrors': True, }) class VideoIE(InfoExtractor): _VALID_URL = r'video:(?P<id>\d+)' def _real_extract(self, url): video_id = self._match_id(url) formats = [{ 'format_id': 'default', 'url': 'url:', }] if video_id == '0': raise ExtractorError('foo') if video_id == '2': formats.append({ 'format_id': 'extra', 'url': TEST_URL, }) return { 'id': video_id, 'title': f'Video {video_id}', 'formats': formats, } class PlaylistIE(InfoExtractor): _VALID_URL = r'playlist:' def _entries(self): for n in range(3): video_id = str(n) yield { '_type': 'url_transparent', 'ie_key': VideoIE.ie_key(), 'id': video_id, 'url': f'video:{video_id}', 'title': f'Video Transparent {video_id}', } def _real_extract(self, url): return self.playlist_result(self._entries()) ydl.add_info_extractor(VideoIE(ydl)) ydl.add_info_extractor(PlaylistIE(ydl)) info = ydl.extract_info('playlist:') entries = info['entries'] self.assertEqual(len(entries), 3) self.assertTrue(entries[0] is None) self.assertTrue(entries[1] is None) self.assertEqual(len(ydl.downloaded_info_dicts), 1) downloaded = ydl.downloaded_info_dicts[0] entries[2].pop('requested_downloads', None) self.assertEqual(entries[2], downloaded) self.assertEqual(downloaded['url'], TEST_URL) self.assertEqual(downloaded['title'], 'Video Transparent 2') self.assertEqual(downloaded['id'], '2') self.assertEqual(downloaded['extractor'], 'Video') self.assertEqual(downloaded['extractor_key'], 'Video') def test_header_cookies(self): from http.cookiejar import Cookie ydl = FakeYDL() ydl.report_warning = lambda *_, **__: None def cookie(name, value, version=None, domain='', path='', secure=False, expires=None): return Cookie( version or 0, name, value, None, False, domain, bool(domain), bool(domain), path, bool(path), secure, expires, False, None, None, rest={}) _test_url = 'https://yt.dlp/test' def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None): def _test(): ydl.cookiejar.clear() ydl._load_cookies(encoded_cookies, autoscope=headers) if headers: ydl._apply_header_cookies(_test_url) data = {'url': _test_url} ydl._calc_headers(data) self.assertCountEqual( map(vars, ydl.cookiejar), map(vars, cookies), 'Extracted cookiejar.Cookie is not the same') if not headers: self.assertEqual( data.get('cookies'), round_trip or encoded_cookies, 'Cookie is not the same as round trip') ydl.__dict__['_YoutubeDL__header_cookies'] = [] with self.subTest(msg=encoded_cookies): if not error_re: _test() return with self.assertRaisesRegex(Exception, error_re): _test() test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')]) test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed') test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [ cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'), cookie('cookie2', 'value2', domain='.yt.dlp', path='/')]) test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [ cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)]) test('test="value; "; path=/test; domain=.yt.dlp', [ cookie('test', 'value; ', domain='.yt.dlp', path='/test')], round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test') test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')], round_trip='name=""; Domain=.yt.dlp') test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True) test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax') ydl.deprecated_feature = ydl.report_error test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk') def test_infojson_cookies(self): TEST_FILE = 'test_infojson_cookies.info.json' TEST_URL = 'https://example.com/example.mp4' COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com' COOKIE_HEADER = {'Cookie': 'a=b; c=d'} ydl = FakeYDL() ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE) def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False): fmt = {'url': TEST_URL} if fmts_header_cookies: fmt['http_headers'] = COOKIE_HEADER if cookies_field: fmt['cookies'] = COOKIES return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None) def test(initial_info, note): result = {} result['processed'] = ydl.process_ie_result(initial_info) self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), msg=f'No cookies set in cookiejar after initial process when {note}') ydl.cookiejar.clear() with open(TEST_FILE) as infojson: result['loaded'] = ydl.sanitize_info(json.load(infojson), True) result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False) self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL), msg=f'No cookies set in cookiejar after final process when {note}') ydl.cookiejar.clear() for key in ('processed', 'loaded', 'final'): info = result[key] self.assertIsNone( traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False), msg=f'Cookie header not removed in {key} result when {note}') self.assertEqual( traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES, msg=f'No cookies field found in {key} result when {note}') test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field') test(make_info(info_header_cookies=True), 'info_dict header cokies') test(make_info(fmts_header_cookies=True), 'format header cookies') test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies') test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields') test(make_info(cookies_field=True), 'cookies format field') test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only') try_rm(TEST_FILE) def test_add_headers_cookie(self): def check_for_cookie_header(result): return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False) ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}}) ydl._apply_header_cookies(_make_result([])['webpage_url']) # Scope to input webpage URL: .example.com fmt = {'url': 'https://example.com/video.mp4'} result = ydl.process_ie_result(_make_result([fmt]), download=False) self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict') self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field') self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar') fmt = {'url': 'https://wrong.com/video.mp4'} result = ydl.process_ie_result(_make_result([fmt]), download=False) self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain') self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain') self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain') if __name__ == '__main__': unittest.main() ����������������������������������������yt-dlp-2024.09.27/test/test_YoutubeDLCookieJar.py���������������������������������������������������0000664�0000000�0000000�00000004527�14675634471�0021416�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import re import tempfile from yt_dlp.cookies import YoutubeDLCookieJar class TestYoutubeDLCookieJar(unittest.TestCase): def test_keep_session_cookies(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') cookiejar.load() tf = tempfile.NamedTemporaryFile(delete=False) try: cookiejar.save(filename=tf.name) temp = tf.read().decode() self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpiresEmpty\s+YoutubeDLExpiresEmptyValue', temp)) self.assertTrue(re.search( r'www\.foobar\.foobar\s+FALSE\s+/\s+TRUE\s+0\s+YoutubeDLExpires0\s+YoutubeDLExpires0Value', temp)) finally: tf.close() os.remove(tf.name) def test_strip_httponly_prefix(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar.load() def assert_cookie_has_value(key): self.assertEqual(cookiejar._cookies['www.foobar.foobar']['/'][key].value, key + '_VALUE') assert_cookie_has_value('HTTPONLY_COOKIE') assert_cookie_has_value('JS_ACCESSIBLE_COOKIE') def test_malformed_cookies(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/malformed_cookies.txt') cookiejar.load() # Cookies should be empty since all malformed cookie file entries # will be ignored self.assertFalse(cookiejar._cookies) def test_get_cookie_header(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/httponly_cookies.txt') cookiejar.load() header = cookiejar.get_cookie_header('https://www.foobar.foobar') self.assertIn('HTTPONLY_COOKIE', header) def test_get_cookies_for_url(self): cookiejar = YoutubeDLCookieJar('./test/testdata/cookies/session_cookies.txt') cookiejar.load() cookies = cookiejar.get_cookies_for_url('https://www.foobar.foobar/') self.assertEqual(len(cookies), 2) cookies = cookiejar.get_cookies_for_url('https://foobar.foobar/') self.assertFalse(cookies) if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_aes.py������������������������������������������������������������������0000664�0000000�0000000�00000015320�14675634471�0016514�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import base64 from yt_dlp.aes import ( aes_cbc_decrypt, aes_cbc_decrypt_bytes, aes_cbc_encrypt, aes_ctr_decrypt, aes_ctr_encrypt, aes_decrypt, aes_decrypt_text, aes_ecb_decrypt, aes_ecb_encrypt, aes_encrypt, aes_gcm_decrypt_and_verify, aes_gcm_decrypt_and_verify_bytes, key_expansion, pad_block, ) from yt_dlp.dependencies import Cryptodome from yt_dlp.utils import bytes_to_intlist, intlist_to_bytes # the encrypted data can be generate with 'devscripts/generate_aes_testdata.py' class TestAES(unittest.TestCase): def setUp(self): self.key = self.iv = [0x20, 0x15] + 14 * [0] self.secret_msg = b'Secret message goes here' def test_encrypt(self): msg = b'message' key = list(range(16)) encrypted = aes_encrypt(bytes_to_intlist(msg), key) decrypted = intlist_to_bytes(aes_decrypt(encrypted, key)) self.assertEqual(decrypted, msg) def test_cbc_decrypt(self): data = b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\x27\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd' decrypted = intlist_to_bytes(aes_cbc_decrypt(bytes_to_intlist(data), self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome.AES: decrypted = aes_cbc_decrypt_bytes(data, intlist_to_bytes(self.key), intlist_to_bytes(self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_cbc_encrypt(self): data = bytes_to_intlist(self.secret_msg) encrypted = intlist_to_bytes(aes_cbc_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x97\x92+\xe5\x0b\xc3\x18\x91ky9m&\xb3\xb5@\xe6\'\xc2\x96.\xc8u\x88\xab9-[\x9e|\xf1\xcd') def test_ctr_decrypt(self): data = bytes_to_intlist(b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') decrypted = intlist_to_bytes(aes_ctr_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_ctr_encrypt(self): data = bytes_to_intlist(self.secret_msg) encrypted = intlist_to_bytes(aes_ctr_encrypt(data, self.key, self.iv)) self.assertEqual( encrypted, b'\x03\xc7\xdd\xd4\x8e\xb3\xbc\x1a*O\xdc1\x12+8Aio\xd1z\xb5#\xaf\x08') def test_gcm_decrypt(self): data = b'\x159Y\xcf5eud\x90\x9c\x85&]\x14\x1d\x0f.\x08\xb4T\xe4/\x17\xbd' authentication_tag = b'\xe8&I\x80rI\x07\x9d}YWuU@:e' decrypted = intlist_to_bytes(aes_gcm_decrypt_and_verify( bytes_to_intlist(data), self.key, bytes_to_intlist(authentication_tag), self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) if Cryptodome.AES: decrypted = aes_gcm_decrypt_and_verify_bytes( data, intlist_to_bytes(self.key), authentication_tag, intlist_to_bytes(self.iv[:12])) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_decrypt_text(self): password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x17\x15\x93\xab\x8d\x80V\xcdV\xe0\t\xcdo\xc2\xa5\xd8ksM\r\xe27N\xae', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 16)) self.assertEqual(decrypted, self.secret_msg) password = intlist_to_bytes(self.key).decode() encrypted = base64.b64encode( intlist_to_bytes(self.iv[:8]) + b'\x0b\xe6\xa4\xd9z\x0e\xb8\xb9\xd0\xd4i_\x85\x1d\x99\x98_\xe5\x80\xe7.\xbf\xa5\x83', ).decode() decrypted = (aes_decrypt_text(encrypted, password, 32)) self.assertEqual(decrypted, self.secret_msg) def test_ecb_encrypt(self): data = bytes_to_intlist(self.secret_msg) encrypted = intlist_to_bytes(aes_ecb_encrypt(data, self.key)) self.assertEqual( encrypted, b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') def test_ecb_decrypt(self): data = bytes_to_intlist(b'\xaa\x86]\x81\x97>\x02\x92\x9d\x1bR[[L/u\xd3&\xd1(h\xde{\x81\x94\xba\x02\xae\xbd\xa6\xd0:') decrypted = intlist_to_bytes(aes_ecb_decrypt(data, self.key, self.iv)) self.assertEqual(decrypted.rstrip(b'\x08'), self.secret_msg) def test_key_expansion(self): key = '4f6bdaa39e2f8cb07f5e722d9edef314' self.assertEqual(key_expansion(bytes_to_intlist(bytearray.fromhex(key))), [ 0x4F, 0x6B, 0xDA, 0xA3, 0x9E, 0x2F, 0x8C, 0xB0, 0x7F, 0x5E, 0x72, 0x2D, 0x9E, 0xDE, 0xF3, 0x14, 0x53, 0x66, 0x20, 0xA8, 0xCD, 0x49, 0xAC, 0x18, 0xB2, 0x17, 0xDE, 0x35, 0x2C, 0xC9, 0x2D, 0x21, 0x8C, 0xBE, 0xDD, 0xD9, 0x41, 0xF7, 0x71, 0xC1, 0xF3, 0xE0, 0xAF, 0xF4, 0xDF, 0x29, 0x82, 0xD5, 0x2D, 0xAD, 0xDE, 0x47, 0x6C, 0x5A, 0xAF, 0x86, 0x9F, 0xBA, 0x00, 0x72, 0x40, 0x93, 0x82, 0xA7, 0xF9, 0xBE, 0x82, 0x4E, 0x95, 0xE4, 0x2D, 0xC8, 0x0A, 0x5E, 0x2D, 0xBA, 0x4A, 0xCD, 0xAF, 0x1D, 0x54, 0xC7, 0x26, 0x98, 0xC1, 0x23, 0x0B, 0x50, 0xCB, 0x7D, 0x26, 0xEA, 0x81, 0xB0, 0x89, 0xF7, 0x93, 0x60, 0x4E, 0x94, 0x52, 0x43, 0x45, 0xC4, 0x99, 0x3E, 0x63, 0x2E, 0x18, 0x8E, 0xEA, 0xD9, 0xCA, 0xE7, 0x7B, 0x39, 0x98, 0xA4, 0x3E, 0xFD, 0x01, 0x9A, 0x5D, 0xD3, 0x19, 0x14, 0xB7, 0x0A, 0xB0, 0x4E, 0x1C, 0xED, 0x28, 0xEA, 0x22, 0x10, 0x29, 0x70, 0x7F, 0xC3, 0x30, 0x64, 0xC8, 0xC9, 0xE8, 0xA6, 0xC1, 0xE9, 0xC0, 0x4C, 0xE3, 0xF9, 0xE9, 0x3C, 0x9C, 0x3A, 0xD9, 0x58, 0x54, 0xF3, 0xB4, 0x86, 0xCC, 0xDC, 0x74, 0xCA, 0x2F, 0x25, 0x9D, 0xF6, 0xB3, 0x1F, 0x44, 0xAE, 0xE7, 0xEC]) def test_pad_block(self): block = [0x21, 0xA0, 0x43, 0xFF] self.assertEqual(pad_block(block, 'pkcs7'), [*block, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C, 0x0C]) self.assertEqual(pad_block(block, 'iso7816'), [*block, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) self.assertEqual(pad_block(block, 'whitespace'), [*block, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20]) self.assertEqual(pad_block(block, 'zero'), [*block, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]) block = list(range(16)) for mode in ('pkcs7', 'iso7816', 'whitespace', 'zero'): self.assertEqual(pad_block(block, mode), block, mode) if __name__ == '__main__': unittest.main() ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_age_restriction.py������������������������������������������������������0000664�0000000�0000000�00000002703�14675634471�0021126�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import is_download_test, try_rm from yt_dlp import YoutubeDL from yt_dlp.utils import DownloadError def _download_restricted(url, filename, age): """ Returns true if the file has been downloaded """ params = { 'age_limit': age, 'skip_download': True, 'writeinfojson': True, 'outtmpl': '%(id)s.%(ext)s', } ydl = YoutubeDL(params) ydl.add_default_info_extractors() json_filename = os.path.splitext(filename)[0] + '.info.json' try_rm(json_filename) try: ydl.download([url]) except DownloadError: pass else: return os.path.exists(json_filename) finally: try_rm(json_filename) @is_download_test class TestAgeRestriction(unittest.TestCase): def _assert_restricted(self, url, filename, age, old_age=None): self.assertTrue(_download_restricted(url, filename, old_age)) self.assertFalse(_download_restricted(url, filename, age)) def test_youtube(self): self._assert_restricted('HtVdAasjOgU', 'HtVdAasjOgU.mp4', 10) def test_youporn(self): self._assert_restricted( 'https://www.youporn.com/watch/16715086/sex-ed-in-detention-18-asmr/', '16715086.mp4', 2, old_age=25) if __name__ == '__main__': unittest.main() �������������������������������������������������������������yt-dlp-2024.09.27/test/test_all_urls.py�������������������������������������������������������������0000664�0000000�0000000�00000013750�14675634471�0017566�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import collections from test.helper import gettestcases from yt_dlp.extractor import FacebookIE, YoutubeIE, gen_extractors class TestAllURLsMatching(unittest.TestCase): def setUp(self): self.ies = gen_extractors() def matching_ies(self, url): return [ie.IE_NAME for ie in self.ies if ie.suitable(url) and ie.IE_NAME != 'generic'] def assertMatch(self, url, ie_list): self.assertEqual(self.matching_ies(url), ie_list) def test_youtube_playlist_matching(self): assertPlaylist = lambda url: self.assertMatch(url, ['youtube:playlist']) assertTab = lambda url: self.assertMatch(url, ['youtube:tab']) assertPlaylist('ECUl4u3cNGP61MdtwGTqZA0MreSaDybji8') assertPlaylist('UUBABnxM4Ar9ten8Mdjj1j0Q') # 585 assertPlaylist('PL63F0C78739B09958') assertTab('https://www.youtube.com/AsapSCIENCE') assertTab('https://www.youtube.com/embedded') assertTab('https://www.youtube.com/playlist?list=UUBABnxM4Ar9ten8Mdjj1j0Q') assertTab('https://www.youtube.com/playlist?list=PLwP_SiAcdui0KVebT0mU9Apz359a4ubsC') assertTab('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012') # 668 self.assertFalse('youtube:playlist' in self.matching_ies('PLtS2H6bU1M')) # Top tracks assertTab('https://www.youtube.com/playlist?list=MCUS.20142101') def test_youtube_matching(self): self.assertTrue(YoutubeIE.suitable('PLtS2H6bU1M')) self.assertFalse(YoutubeIE.suitable('https://www.youtube.com/watch?v=AV6J6_AeFEQ&playnext=1&list=PL4023E734DA416012')) # 668 self.assertMatch('http://youtu.be/BaW_jenozKc', ['youtube']) # self.assertMatch('http://www.youtube.com/v/BaW_jenozKc', ['youtube']) # /v/ is no longer valid self.assertMatch('https://youtube.googleapis.com/v/BaW_jenozKc', ['youtube']) self.assertMatch('http://www.cleanvideosearch.com/media/action/yt/watch?videoId=8v_4O44sfjM', ['youtube']) def test_youtube_channel_matching(self): assertChannel = lambda url: self.assertMatch(url, ['youtube:tab']) assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM?feature=gb_ch_rec') assertChannel('https://www.youtube.com/channel/HCtnHdj3df7iM/videos') def test_youtube_user_matching(self): self.assertMatch('http://www.youtube.com/NASAgovVideo/videos', ['youtube:tab']) def test_youtube_feeds(self): self.assertMatch('https://www.youtube.com/feed/library', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/history', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/watch_later', ['youtube:tab']) self.assertMatch('https://www.youtube.com/feed/subscriptions', ['youtube:tab']) def test_youtube_search_matching(self): self.assertMatch('http://www.youtube.com/results?search_query=making+mustard', ['youtube:search_url']) self.assertMatch('https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', ['youtube:search_url']) def test_facebook_matching(self): self.assertTrue(FacebookIE.suitable('https://www.facebook.com/Shiniknoh#!/photo.php?v=10153317450565268')) self.assertTrue(FacebookIE.suitable('https://www.facebook.com/cindyweather?fref=ts#!/photo.php?v=10152183998945793')) def test_no_duplicates(self): ies = gen_extractors() for tc in gettestcases(include_onlymatching=True): url = tc['url'] for ie in ies: if type(ie).__name__ in ('GenericIE', tc['name'] + 'IE'): self.assertTrue(ie.suitable(url), f'{type(ie).__name__} should match URL {url!r}') else: self.assertFalse( ie.suitable(url), f'{type(ie).__name__} should not match URL {url!r} . That URL belongs to {tc["name"]}.') def test_keywords(self): self.assertMatch(':ytsubs', ['youtube:subscriptions']) self.assertMatch(':ytsubscriptions', ['youtube:subscriptions']) self.assertMatch(':ythistory', ['youtube:history']) def test_vimeo_matching(self): self.assertMatch('https://vimeo.com/channels/tributes', ['vimeo:channel']) self.assertMatch('https://vimeo.com/channels/31259', ['vimeo:channel']) self.assertMatch('https://vimeo.com/channels/31259/53576664', ['vimeo']) self.assertMatch('https://vimeo.com/user7108434', ['vimeo:user']) self.assertMatch('https://vimeo.com/user7108434/videos', ['vimeo:user']) self.assertMatch('https://vimeo.com/user21297594/review/75524534/3c257a1b5d', ['vimeo:review']) # https://github.com/ytdl-org/youtube-dl/issues/1930 def test_soundcloud_not_matching_sets(self): self.assertMatch('http://soundcloud.com/floex/sets/gone-ep', ['soundcloud:set']) def test_tumblr(self): self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430/orphan-black-dvd-extra-behind-the-scenes', ['Tumblr']) self.assertMatch('http://tatianamaslanydaily.tumblr.com/post/54196191430', ['Tumblr']) def test_pbs(self): # https://github.com/ytdl-org/youtube-dl/issues/2350 self.assertMatch('http://video.pbs.org/viralplayer/2365173446/', ['pbs']) self.assertMatch('http://video.pbs.org/widget/partnerplayer/980042464/', ['pbs']) def test_no_duplicated_ie_names(self): name_accu = collections.defaultdict(list) for ie in self.ies: name_accu[ie.IE_NAME.lower()].append(type(ie).__name__) for (ie_name, ie_list) in name_accu.items(): self.assertEqual( len(ie_list), 1, f'Multiple extractors with the same IE_NAME "{ie_name}" ({", ".join(ie_list)})') if __name__ == '__main__': unittest.main() ������������������������yt-dlp-2024.09.27/test/test_cache.py����������������������������������������������������������������0000664�0000000�0000000�00000002754�14675634471�0017016�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import shutil from test.helper import FakeYDL from yt_dlp.cache import Cache def _is_empty(d): return not bool(os.listdir(d)) def _mkdir(d): if not os.path.exists(d): os.mkdir(d) class TestCache(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata') _mkdir(TESTDATA_DIR) self.test_dir = os.path.join(TESTDATA_DIR, 'cache_test') self.tearDown() def tearDown(self): if os.path.exists(self.test_dir): shutil.rmtree(self.test_dir) def test_cache(self): ydl = FakeYDL({ 'cachedir': self.test_dir, }) c = Cache(ydl) obj = {'x': 1, 'y': ['ä', '\\a', True]} self.assertEqual(c.load('test_cache', 'k.'), None) c.store('test_cache', 'k.', obj) self.assertEqual(c.load('test_cache', 'k2'), None) self.assertFalse(_is_empty(self.test_dir)) self.assertEqual(c.load('test_cache', 'k.'), obj) self.assertEqual(c.load('test_cache', 'y'), None) self.assertEqual(c.load('test_cache2', 'k.'), None) c.remove() self.assertFalse(os.path.exists(self.test_dir)) self.assertEqual(c.load('test_cache', 'k.'), None) if __name__ == '__main__': unittest.main() ��������������������yt-dlp-2024.09.27/test/test_compat.py���������������������������������������������������������������0000664�0000000�0000000�00000011216�14675634471�0017227�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import struct from yt_dlp import compat from yt_dlp.compat import urllib # isort: split from yt_dlp.compat import ( compat_etree_fromstring, compat_expanduser, compat_urllib_parse_unquote, # noqa: TID251 compat_urllib_parse_urlencode, # noqa: TID251 ) from yt_dlp.compat.urllib.request import getproxies class TestCompat(unittest.TestCase): def test_compat_passthrough(self): with self.assertWarns(DeprecationWarning): _ = compat.compat_basestring with self.assertWarns(DeprecationWarning): _ = compat.WINDOWS_VT_MODE self.assertEqual(urllib.request.getproxies, getproxies) with self.assertWarns(DeprecationWarning): _ = compat.compat_pycrypto_AES # Must not raise error def test_compat_expanduser(self): old_home = os.environ.get('HOME') test_str = R'C:\Documents and Settings\тест\Application Data' try: os.environ['HOME'] = test_str self.assertEqual(compat_expanduser('~'), test_str) finally: os.environ['HOME'] = old_home or '' def test_compat_urllib_parse_unquote(self): self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def') self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'), '~/abc+def') self.assertEqual(compat_urllib_parse_unquote(''), '') self.assertEqual(compat_urllib_parse_unquote('%'), '%') self.assertEqual(compat_urllib_parse_unquote('%%'), '%%') self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%') self.assertEqual(compat_urllib_parse_unquote('%2F'), '/') self.assertEqual(compat_urllib_parse_unquote('%2f'), '/') self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'), '津波') self.assertEqual( compat_urllib_parse_unquote('''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" /> %<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''), '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" /> %<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''') self.assertEqual( compat_urllib_parse_unquote('''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%87%80 %E2%86%B6%I%Break%25Things%'''), '''(^◣_◢^)っ︻デ═一 ⇀ ⇀ ⇀ ⇀ ⇀ ↶%I%Break%Things%''') def test_compat_urllib_parse_unquote_plus(self): self.assertEqual(urllib.parse.unquote_plus('abc%20def'), 'abc def') self.assertEqual(urllib.parse.unquote_plus('%7e/abc+def'), '~/abc def') def test_compat_urllib_parse_urlencode(self): self.assertEqual(compat_urllib_parse_urlencode({'abc': 'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({'abc': b'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': 'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode({b'abc': b'def'}), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([('abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([('abc', b'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', 'def')]), 'abc=def') self.assertEqual(compat_urllib_parse_urlencode([(b'abc', b'def')]), 'abc=def') def test_compat_etree_fromstring(self): xml = ''' <root foo="bar" spam="中文"> <normal>foo</normal> <chinese>中文</chinese> <foo><bar>spam</bar></foo> </root> ''' doc = compat_etree_fromstring(xml.encode()) self.assertTrue(isinstance(doc.attrib['foo'], str)) self.assertTrue(isinstance(doc.attrib['spam'], str)) self.assertTrue(isinstance(doc.find('normal').text, str)) self.assertTrue(isinstance(doc.find('chinese').text, str)) self.assertTrue(isinstance(doc.find('foo/bar').text, str)) def test_compat_etree_fromstring_doctype(self): xml = '''<?xml version="1.0"?> <!DOCTYPE smil PUBLIC "-//W3C//DTD SMIL 2.0//EN" "http://www.w3.org/2001/SMIL20/SMIL20.dtd"> <smil xmlns="http://www.w3.org/2001/SMIL20/Language"></smil>''' compat_etree_fromstring(xml) def test_struct_unpack(self): self.assertEqual(struct.unpack('!B', b'\x00'), (0,)) if __name__ == '__main__': unittest.main() ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_config.py���������������������������������������������������������������0000664�0000000�0000000�00000015102�14675634471�0017207�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest import unittest.mock sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import itertools from pathlib import Path from yt_dlp.compat import compat_expanduser from yt_dlp.options import create_parser, parseOpts from yt_dlp.utils import Config, get_executable_path ENVIRON_DEFAULTS = { 'HOME': None, 'XDG_CONFIG_HOME': '/_xdg_config_home/', 'USERPROFILE': 'C:/Users/testing/', 'APPDATA': 'C:/Users/testing/AppData/Roaming/', 'HOMEDRIVE': 'C:/', 'HOMEPATH': 'Users/testing/', } @contextlib.contextmanager def set_environ(**kwargs): saved_environ = os.environ.copy() for name, value in {**ENVIRON_DEFAULTS, **kwargs}.items(): if value is None: os.environ.pop(name, None) else: os.environ[name] = value yield os.environ.clear() os.environ.update(saved_environ) def _generate_expected_groups(): xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config') appdata_dir = os.getenv('appdata') home_dir = compat_expanduser('~') return { 'Portable': [ Path(get_executable_path(), 'yt-dlp.conf'), ], 'Home': [ Path('yt-dlp.conf'), ], 'User': [ Path(xdg_config_home, 'yt-dlp.conf'), Path(xdg_config_home, 'yt-dlp', 'config'), Path(xdg_config_home, 'yt-dlp', 'config.txt'), *(( Path(appdata_dir, 'yt-dlp.conf'), Path(appdata_dir, 'yt-dlp', 'config'), Path(appdata_dir, 'yt-dlp', 'config.txt'), ) if appdata_dir else ()), Path(home_dir, 'yt-dlp.conf'), Path(home_dir, 'yt-dlp.conf.txt'), Path(home_dir, '.yt-dlp', 'config'), Path(home_dir, '.yt-dlp', 'config.txt'), ], 'System': [ Path('/etc/yt-dlp.conf'), Path('/etc/yt-dlp/config'), Path('/etc/yt-dlp/config.txt'), ], } class TestConfig(unittest.TestCase): maxDiff = None @set_environ() def test_config__ENVIRON_DEFAULTS_sanity(self): expected = make_expected() self.assertCountEqual( set(expected), expected, 'ENVIRON_DEFAULTS produces non unique names') def test_config_all_environ_values(self): for name, value in ENVIRON_DEFAULTS.items(): for new_value in (None, '', '.', value or '/some/dir'): with set_environ(**{name: new_value}): self._simple_grouping_test() def test_config_default_expected_locations(self): files, _ = self._simple_config_test() self.assertEqual( files, make_expected(), 'Not all expected locations have been checked') def test_config_default_grouping(self): self._simple_grouping_test() def _simple_grouping_test(self): expected_groups = make_expected_groups() for name, group in expected_groups.items(): for index, existing_path in enumerate(group): result, opts = self._simple_config_test(existing_path) expected = expected_from_expected_groups(expected_groups, existing_path) self.assertEqual( result, expected, f'The checked locations do not match the expected ({name}, {index})') self.assertEqual( opts.outtmpl['default'], '1', f'The used result value was incorrect ({name}, {index})') def _simple_config_test(self, *stop_paths): encountered = 0 paths = [] def read_file(filename, default=[]): nonlocal encountered path = Path(filename) paths.append(path) if path in stop_paths: encountered += 1 return ['-o', f'{encountered}'] with ConfigMock(read_file): _, opts, _ = parseOpts([], False) return paths, opts @set_environ() def test_config_early_exit_commandline(self): self._early_exit_test(0, '--ignore-config') @set_environ() def test_config_early_exit_files(self): for index, _ in enumerate(make_expected(), 1): self._early_exit_test(index) def _early_exit_test(self, allowed_reads, *args): reads = 0 def read_file(filename, default=[]): nonlocal reads reads += 1 if reads > allowed_reads: self.fail('The remaining config was not ignored') elif reads == allowed_reads: return ['--ignore-config'] with ConfigMock(read_file): parseOpts(args, False) @set_environ() def test_config_override_commandline(self): self._override_test(0, '-o', 'pass') @set_environ() def test_config_override_files(self): for index, _ in enumerate(make_expected(), 1): self._override_test(index) def _override_test(self, start_index, *args): index = 0 def read_file(filename, default=[]): nonlocal index index += 1 if index > start_index: return ['-o', 'fail'] elif index == start_index: return ['-o', 'pass'] with ConfigMock(read_file): _, opts, _ = parseOpts(args, False) self.assertEqual( opts.outtmpl['default'], 'pass', 'The earlier group did not override the later ones') @contextlib.contextmanager def ConfigMock(read_file=None): with unittest.mock.patch('yt_dlp.options.Config') as mock: mock.return_value = Config(create_parser()) if read_file is not None: mock.read_file = read_file yield mock def make_expected(*filepaths): return expected_from_expected_groups(_generate_expected_groups(), *filepaths) def make_expected_groups(*filepaths): return _filter_expected_groups(_generate_expected_groups(), filepaths) def expected_from_expected_groups(expected_groups, *filepaths): return list(itertools.chain.from_iterable( _filter_expected_groups(expected_groups, filepaths).values())) def _filter_expected_groups(expected, filepaths): if not filepaths: return expected result = {} for group, paths in expected.items(): new_paths = [] for path in paths: new_paths.append(path) if path in filepaths: break result[group] = new_paths return result if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_cookies.py��������������������������������������������������������������0000664�0000000�0000000�00000032664�14675634471�0017412�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import datetime as dt import unittest from yt_dlp import cookies from yt_dlp.cookies import ( LenientSimpleCookie, LinuxChromeCookieDecryptor, MacChromeCookieDecryptor, WindowsChromeCookieDecryptor, _get_linux_desktop_environment, _LinuxDesktopEnvironment, parse_safari_cookies, pbkdf2_sha1, ) class Logger: def debug(self, message, *args, **kwargs): print(f'[verbose] {message}') def info(self, message, *args, **kwargs): print(message) def warning(self, message, *args, **kwargs): self.error(message) def error(self, message, *args, **kwargs): raise Exception(message) class MonkeyPatch: def __init__(self, module, temporary_values): self._module = module self._temporary_values = temporary_values self._backup_values = {} def __enter__(self): for name, temp_value in self._temporary_values.items(): self._backup_values[name] = getattr(self._module, name) setattr(self._module, name, temp_value) def __exit__(self, exc_type, exc_val, exc_tb): for name, backup_value in self._backup_values.items(): setattr(self._module, name, backup_value) class TestCookies(unittest.TestCase): def test_get_desktop_environment(self): """ based on https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util_unittest.cc """ test_cases = [ ({}, _LinuxDesktopEnvironment.OTHER), ({'DESKTOP_SESSION': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER), ({'XDG_CURRENT_DESKTOP': 'my_custom_de'}, _LinuxDesktopEnvironment.OTHER), ({'DESKTOP_SESSION': 'gnome'}, _LinuxDesktopEnvironment.GNOME), ({'DESKTOP_SESSION': 'mate'}, _LinuxDesktopEnvironment.GNOME), ({'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), ({'DESKTOP_SESSION': 'kde'}, _LinuxDesktopEnvironment.KDE3), ({'DESKTOP_SESSION': 'xfce'}, _LinuxDesktopEnvironment.XFCE), ({'GNOME_DESKTOP_SESSION_ID': 1}, _LinuxDesktopEnvironment.GNOME), ({'KDE_FULL_SESSION': 1}, _LinuxDesktopEnvironment.KDE3), ({'KDE_FULL_SESSION': 1, 'DESKTOP_SESSION': 'kde4'}, _LinuxDesktopEnvironment.KDE4), ({'XDG_CURRENT_DESKTOP': 'X-Cinnamon'}, _LinuxDesktopEnvironment.CINNAMON), ({'XDG_CURRENT_DESKTOP': 'Deepin'}, _LinuxDesktopEnvironment.DEEPIN), ({'XDG_CURRENT_DESKTOP': 'GNOME'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME:GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'GNOME : GNOME-Classic'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'ubuntu:GNOME'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'Unity', 'DESKTOP_SESSION': 'gnome-fallback'}, _LinuxDesktopEnvironment.GNOME), ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '5'}, _LinuxDesktopEnvironment.KDE5), ({'XDG_CURRENT_DESKTOP': 'KDE', 'KDE_SESSION_VERSION': '6'}, _LinuxDesktopEnvironment.KDE6), ({'XDG_CURRENT_DESKTOP': 'KDE'}, _LinuxDesktopEnvironment.KDE4), ({'XDG_CURRENT_DESKTOP': 'Pantheon'}, _LinuxDesktopEnvironment.PANTHEON), ({'XDG_CURRENT_DESKTOP': 'UKUI'}, _LinuxDesktopEnvironment.UKUI), ({'XDG_CURRENT_DESKTOP': 'Unity'}, _LinuxDesktopEnvironment.UNITY), ({'XDG_CURRENT_DESKTOP': 'Unity:Unity7'}, _LinuxDesktopEnvironment.UNITY), ({'XDG_CURRENT_DESKTOP': 'Unity:Unity8'}, _LinuxDesktopEnvironment.UNITY), ] for env, expected_desktop_environment in test_cases: self.assertEqual(_get_linux_desktop_environment(env, Logger()), expected_desktop_environment) def test_chrome_cookie_decryptor_linux_derive_key(self): key = LinuxChromeCookieDecryptor.derive_key(b'abc') self.assertEqual(key, b'7\xa1\xec\xd4m\xfcA\xc7\xb19Z\xd0\x19\xdcM\x17') def test_chrome_cookie_decryptor_mac_derive_key(self): key = MacChromeCookieDecryptor.derive_key(b'abc') self.assertEqual(key, b'Y\xe2\xc0\xd0P\xf6\xf4\xe1l\xc1\x8cQ\xcb|\xcdY') def test_chrome_cookie_decryptor_linux_v10(self): with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): encrypted_value = b'v10\xccW%\xcd\xe6\xe6\x9fM" \xa7\xb0\xca\xe4\x07\xd6' value = 'USD' decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_linux_v11(self): with MonkeyPatch(cookies, {'_get_linux_keyring_password': lambda *args, **kwargs: b''}): encrypted_value = b'v11#\x81\x10>`w\x8f)\xc0\xb2\xc1\r\xf4\x1al\xdd\x93\xfd\xf8\xf8N\xf2\xa9\x83\xf1\xe9o\x0elVQd' value = 'tz=Europe.London' decryptor = LinuxChromeCookieDecryptor('Chrome', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_windows_v10(self): with MonkeyPatch(cookies, { '_get_windows_v10_key': lambda *args, **kwargs: b'Y\xef\xad\xad\xeerp\xf0Y\xe6\x9b\x12\xc2<z\x16]\n\xbb\xb8\xcb\xd7\x9bA\xc3\x14e\x99{\xd6\xf4&', }): encrypted_value = b'v10T\xb8\xf3\xb8\x01\xa7TtcV\xfc\x88\xb8\xb8\xef\x05\xb5\xfd\x18\xc90\x009\xab\xb1\x893\x85)\x87\xe1\xa9-\xa3\xad=' value = '32101439' decryptor = WindowsChromeCookieDecryptor('', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_chrome_cookie_decryptor_mac_v10(self): with MonkeyPatch(cookies, {'_get_mac_keyring_password': lambda *args, **kwargs: b'6eIDUdtKAacvlHwBVwvg/Q=='}): encrypted_value = b'v10\xb3\xbe\xad\xa1[\x9fC\xa1\x98\xe0\x9a\x01\xd9\xcf\xbfc' value = '2021-06-01-22' decryptor = MacChromeCookieDecryptor('', Logger()) self.assertEqual(decryptor.decrypt(encrypted_value), value) def test_safari_cookie_parsing(self): cookies = ( b'cook\x00\x00\x00\x01\x00\x00\x00i\x00\x00\x01\x00\x01\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00Y' b'\x00\x00\x00\x00\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x008\x00\x00\x00B\x00\x00\x00F\x00\x00\x00H' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x03\xa5>\xc3A\x00\x00\x80\xc3\x07:\xc3A' b'localhost\x00foo\x00/\x00test%20%3Bcookie\x00\x00\x00\x054\x07\x17 \x05\x00\x00\x00Kbplist00\xd1\x01' b'\x02_\x10\x18NSHTTPCookieAcceptPolicy\x10\x02\x08\x0b&\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00\x00' b'\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00(') jar = parse_safari_cookies(cookies) self.assertEqual(len(jar), 1) cookie = next(iter(jar)) self.assertEqual(cookie.domain, 'localhost') self.assertEqual(cookie.port, None) self.assertEqual(cookie.path, '/') self.assertEqual(cookie.name, 'foo') self.assertEqual(cookie.value, 'test%20%3Bcookie') self.assertFalse(cookie.secure) expected_expiration = dt.datetime(2021, 6, 18, 21, 39, 19, tzinfo=dt.timezone.utc) self.assertEqual(cookie.expires, int(expected_expiration.timestamp())) def test_pbkdf2_sha1(self): key = pbkdf2_sha1(b'peanuts', b' ' * 16, 1, 16) self.assertEqual(key, b'g\xe1\x8e\x0fQ\x1c\x9b\xf3\xc9`!\xaa\x90\xd9\xd34') class TestLenientSimpleCookie(unittest.TestCase): def _run_tests(self, *cases): for message, raw_cookie, expected in cases: cookie = LenientSimpleCookie(raw_cookie) with self.subTest(message, expected=expected): self.assertEqual(cookie.keys(), expected.keys(), message) for key, expected_value in expected.items(): morsel = cookie[key] if isinstance(expected_value, tuple): expected_value, expected_attributes = expected_value else: expected_attributes = {} attributes = { key: value for key, value in dict(morsel).items() if value != '' } self.assertEqual(attributes, expected_attributes, message) self.assertEqual(morsel.value, expected_value, message) def test_parsing(self): self._run_tests( # Copied from https://github.com/python/cpython/blob/v3.10.7/Lib/test/test_http_cookies.py ( 'Test basic cookie', 'chips=ahoy; vienna=finger', {'chips': 'ahoy', 'vienna': 'finger'}, ), ( 'Test quoted cookie', 'keebler="E=mc2; L=\\"Loves\\"; fudge=\\012;"', {'keebler': 'E=mc2; L="Loves"; fudge=\012;'}, ), ( "Allow '=' in an unquoted value", 'keebler=E=mc2', {'keebler': 'E=mc2'}, ), ( "Allow cookies with ':' in their name", 'key:term=value:term', {'key:term': 'value:term'}, ), ( "Allow '[' and ']' in cookie values", 'a=b; c=[; d=r; f=h', {'a': 'b', 'c': '[', 'd': 'r', 'f': 'h'}, ), ( 'Test basic cookie attributes', 'Customer="WILE_E_COYOTE"; Version=1; Path=/acme', {'Customer': ('WILE_E_COYOTE', {'version': '1', 'path': '/acme'})}, ), ( 'Test flag only cookie attributes', 'Customer="WILE_E_COYOTE"; HttpOnly; Secure', {'Customer': ('WILE_E_COYOTE', {'httponly': True, 'secure': True})}, ), ( 'Test flag only attribute with values', 'eggs=scrambled; httponly=foo; secure=bar; Path=/bacon', {'eggs': ('scrambled', {'httponly': 'foo', 'secure': 'bar', 'path': '/bacon'})}, ), ( "Test special case for 'expires' attribute, 4 digit year", 'Customer="W"; expires=Wed, 01 Jan 2010 00:00:00 GMT', {'Customer': ('W', {'expires': 'Wed, 01 Jan 2010 00:00:00 GMT'})}, ), ( "Test special case for 'expires' attribute, 2 digit year", 'Customer="W"; expires=Wed, 01 Jan 98 00:00:00 GMT', {'Customer': ('W', {'expires': 'Wed, 01 Jan 98 00:00:00 GMT'})}, ), ( 'Test extra spaces in keys and values', 'eggs = scrambled ; secure ; path = bar ; foo=foo ', {'eggs': ('scrambled', {'secure': True, 'path': 'bar'}), 'foo': 'foo'}, ), ( 'Test quoted attributes', 'Customer="WILE_E_COYOTE"; Version="1"; Path="/acme"', {'Customer': ('WILE_E_COYOTE', {'version': '1', 'path': '/acme'})}, ), # Our own tests that CPython passes ( "Allow ';' in quoted value", 'chips="a;hoy"; vienna=finger', {'chips': 'a;hoy', 'vienna': 'finger'}, ), ( 'Keep only the last set value', 'a=c; a=b', {'a': 'b'}, ), ) def test_lenient_parsing(self): self._run_tests( ( 'Ignore and try to skip invalid cookies', 'chips={"ahoy;": 1}; vienna="finger;"', {'vienna': 'finger;'}, ), ( 'Ignore cookies without a name', 'a=b; unnamed; c=d', {'a': 'b', 'c': 'd'}, ), ( "Ignore '\"' cookie without name", 'a=b; "; c=d', {'a': 'b', 'c': 'd'}, ), ( 'Skip all space separated values', 'x a=b c=d x; e=f', {'a': 'b', 'c': 'd', 'e': 'f'}, ), ( 'Skip all space separated values', 'x a=b; data={"complex": "json", "with": "key=value"}; x c=d x', {'a': 'b', 'c': 'd'}, ), ( 'Expect quote mending', 'a=b; invalid="; c=d', {'a': 'b', 'c': 'd'}, ), ( 'Reset morsel after invalid to not capture attributes', 'a=b; invalid; Version=1; c=d', {'a': 'b', 'c': 'd'}, ), ( 'Reset morsel after invalid to not capture attributes', 'a=b; $invalid; $Version=1; c=d', {'a': 'b', 'c': 'd'}, ), ( 'Continue after non-flag attribute without value', 'a=b; path; Version=1; c=d', {'a': 'b', 'c': 'd'}, ), ( 'Allow cookie attributes with `$` prefix', 'Customer="WILE_E_COYOTE"; $Version=1; $Secure; $Path=/acme', {'Customer': ('WILE_E_COYOTE', {'version': '1', 'secure': True, 'path': '/acme'})}, ), ( 'Invalid Morsel keys should not result in an error', 'Key=Value; [Invalid]=Value; Another=Value', {'Key': 'Value', 'Another': 'Value'}, ), ) ����������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_download.py�������������������������������������������������������������0000775�0000000�0000000�00000027375�14675634471�0017573�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import collections import hashlib import json from test.helper import ( assertGreaterEqual, expect_info_dict, expect_warnings, get_params, gettestcases, getwebpagetestcases, is_download_test, try_rm, ) import yt_dlp.YoutubeDL # isort: split from yt_dlp.extractor import get_info_extractor from yt_dlp.networking.exceptions import HTTPError, TransportError from yt_dlp.utils import ( DownloadError, ExtractorError, UnavailableVideoError, YoutubeDLError, format_bytes, join_nonempty, ) RETRIES = 3 class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): self.to_stderr = self.to_screen self.processed_info_dicts = [] super().__init__(*args, **kwargs) def report_warning(self, message, *args, **kwargs): # Don't accept warnings during tests raise ExtractorError(message) def process_info(self, info_dict): self.processed_info_dicts.append(info_dict.copy()) return super().process_info(info_dict) def _file_md5(fn): with open(fn, 'rb') as f: return hashlib.md5(f.read()).hexdigest() normal_test_cases = gettestcases() webpage_test_cases = getwebpagetestcases() tests_counter = collections.defaultdict(collections.Counter) @is_download_test class TestDownload(unittest.TestCase): # Parallel testing in nosetests. See # http://nose.readthedocs.org/en/latest/doc_tests/test_multiprocess/multiprocess.html _multiprocess_shared_ = True maxDiff = None COMPLETED_TESTS = {} def __str__(self): """Identify each test with the `add_ie` attribute, if available.""" cls, add_ie = type(self), getattr(self, self._testMethodName).add_ie return f'{self._testMethodName} ({cls.__module__}.{cls.__name__}){f" [{add_ie}]" if add_ie else ""}:' # Dynamically generate tests def generator(test_case, tname): def test_template(self): if self.COMPLETED_TESTS.get(tname): return self.COMPLETED_TESTS[tname] = True ie = yt_dlp.extractor.get_info_extractor(test_case['name'])() other_ies = [get_info_extractor(ie_key)() for ie_key in test_case.get('add_ie', [])] is_playlist = any(k.startswith('playlist') for k in test_case) test_cases = test_case.get( 'playlist', [] if is_playlist else [test_case]) def print_skipping(reason): print('Skipping {}: {}'.format(test_case['name'], reason)) self.skipTest(reason) if not ie.working(): print_skipping('IE marked as not _WORKING') for tc in test_cases: if tc.get('expected_exception'): continue info_dict = tc.get('info_dict', {}) params = tc.get('params', {}) if not info_dict.get('id'): raise Exception(f'Test {tname} definition incorrect - "id" key is not present') elif not info_dict.get('ext') and info_dict.get('_type', 'video') == 'video': if params.get('skip_download') and params.get('ignore_no_formats_error'): continue raise Exception(f'Test {tname} definition incorrect - "ext" key must be present to define the output file') if 'skip' in test_case: print_skipping(test_case['skip']) for other_ie in other_ies: if not other_ie.working(): print_skipping(f'test depends on {other_ie.ie_key()}IE, marked as not WORKING') params = get_params(test_case.get('params', {})) params['outtmpl'] = tname + '_' + params['outtmpl'] if is_playlist and 'playlist' not in test_case: params.setdefault('extract_flat', 'in_playlist') params.setdefault('playlistend', test_case.get( 'playlist_mincount', test_case.get('playlist_count', -2) + 1)) params.setdefault('skip_download', True) ydl = YoutubeDL(params, auto_init=False) ydl.add_default_info_extractors() finished_hook_called = set() def _hook(status): if status['status'] == 'finished': finished_hook_called.add(status['filename']) ydl.add_progress_hook(_hook) expect_warnings(ydl, test_case.get('expected_warnings', [])) def get_tc_filename(tc): return ydl.prepare_filename(dict(tc.get('info_dict', {}))) res_dict = None def match_exception(err): expected_exception = test_case.get('expected_exception') if not expected_exception: return False if err.__class__.__name__ == expected_exception: return True return any(exc.__class__.__name__ == expected_exception for exc in err.exc_info) def try_rm_tcs_files(tcs=None): if tcs is None: tcs = test_cases for tc in tcs: tc_filename = get_tc_filename(tc) try_rm(tc_filename) try_rm(tc_filename + '.part') try_rm(os.path.splitext(tc_filename)[0] + '.info.json') try_rm_tcs_files() try: try_num = 1 while True: try: # We're not using .download here since that is just a shim # for outside error handling, and returns the exit code # instead of the result dict. res_dict = ydl.extract_info( test_case['url'], force_generic_extractor=params.get('force_generic_extractor', False)) except (DownloadError, ExtractorError) as err: # Check if the exception is not a network related one if not isinstance(err.exc_info[1], (TransportError, UnavailableVideoError)) or (isinstance(err.exc_info[1], HTTPError) and err.exc_info[1].status == 503): if match_exception(err): return err.msg = f'{getattr(err, "msg", err)} ({tname})' raise if try_num == RETRIES: raise print(f'Retrying: {try_num} failed tries\n\n##########\n\n') try_num += 1 except YoutubeDLError as err: if match_exception(err): return raise else: break if is_playlist: self.assertTrue(res_dict['_type'] in ['playlist', 'multi_video']) self.assertTrue('entries' in res_dict) expect_info_dict(self, res_dict, test_case.get('info_dict', {})) if 'playlist_mincount' in test_case: assertGreaterEqual( self, len(res_dict['entries']), test_case['playlist_mincount'], 'Expected at least %d in playlist %s, but got only %d' % ( test_case['playlist_mincount'], test_case['url'], len(res_dict['entries']))) if 'playlist_count' in test_case: self.assertEqual( len(res_dict['entries']), test_case['playlist_count'], 'Expected %d entries in playlist %s, but got %d.' % ( test_case['playlist_count'], test_case['url'], len(res_dict['entries']), )) if 'playlist_duration_sum' in test_case: got_duration = sum(e['duration'] for e in res_dict['entries']) self.assertEqual( test_case['playlist_duration_sum'], got_duration) # Generalize both playlists and single videos to unified format for # simplicity if 'entries' not in res_dict: res_dict['entries'] = [res_dict] for tc_num, tc in enumerate(test_cases): tc_res_dict = res_dict['entries'][tc_num] # First, check test cases' data against extracted data alone expect_info_dict(self, tc_res_dict, tc.get('info_dict', {})) if tc_res_dict.get('_type', 'video') != 'video': continue # Now, check downloaded file consistency tc_filename = get_tc_filename(tc) if not test_case.get('params', {}).get('skip_download', False): self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename) self.assertTrue(tc_filename in finished_hook_called) expected_minsize = tc.get('file_minsize', 10000) if expected_minsize is not None: if params.get('test'): expected_minsize = max(expected_minsize, 10000) got_fsize = os.path.getsize(tc_filename) assertGreaterEqual( self, got_fsize, expected_minsize, f'Expected {tc_filename} to be at least {format_bytes(expected_minsize)}, ' f'but it\'s only {format_bytes(got_fsize)} ') if 'md5' in tc: md5_for_file = _file_md5(tc_filename) self.assertEqual(tc['md5'], md5_for_file) # Finally, check test cases' data again but this time against # extracted data from info JSON file written during processing info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json' self.assertTrue( os.path.exists(info_json_fn), f'Missing info file {info_json_fn}') with open(info_json_fn, encoding='utf-8') as infof: info_dict = json.load(infof) expect_info_dict(self, info_dict, tc.get('info_dict', {})) finally: try_rm_tcs_files() if is_playlist and res_dict is not None and res_dict.get('entries'): # Remove all other files that may have been extracted if the # extractor returns full results even with extract_flat res_tcs = [{'info_dict': e} for e in res_dict['entries']] try_rm_tcs_files(res_tcs) ydl.close() return test_template # And add them to TestDownload def inject_tests(test_cases, label=''): for test_case in test_cases: name = test_case['name'] tname = join_nonempty('test', name, label, tests_counter[name][label], delim='_') tests_counter[name][label] += 1 test_method = generator(test_case, tname) test_method.__name__ = tname test_method.add_ie = ','.join(test_case.get('add_ie', [])) setattr(TestDownload, test_method.__name__, test_method) inject_tests(normal_test_cases) # TODO: disable redirection to the IE to ensure we are actually testing the webpage extraction inject_tests(webpage_test_cases, 'webpage') def batch_generator(name): def test_template(self): for label, num_tests in tests_counter[name].items(): for i in range(num_tests): test_name = join_nonempty('test', name, label, i, delim='_') try: getattr(self, test_name)() except unittest.SkipTest: print(f'Skipped {test_name}') return test_template for name in tests_counter: test_method = batch_generator(name) test_method.__name__ = f'test_{name}_all' test_method.add_ie = '' setattr(TestDownload, test_method.__name__, test_method) del test_method if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_downloader_external.py��������������������������������������������������0000664�0000000�0000000�00000011427�14675634471�0022010�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.cookiejar from test.helper import FakeYDL from yt_dlp.downloader.external import ( Aria2cFD, AxelFD, CurlFD, FFmpegFD, HttpieFD, WgetFD, ) TEST_COOKIE = { 'version': 0, 'name': 'test', 'value': 'ytdlp', 'port': None, 'port_specified': False, 'domain': '.example.com', 'domain_specified': True, 'domain_initial_dot': False, 'path': '/', 'path_specified': True, 'secure': False, 'expires': None, 'discard': False, 'comment': None, 'comment_url': None, 'rest': {}, } TEST_INFO = {'url': 'http://www.example.com/'} class TestHttpieFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = HttpieFD(ydl, {}) self.assertEqual( downloader._make_cmd('test', TEST_INFO), ['http', '--download', '--output', 'test', 'http://www.example.com/']) # Test cookie header is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) self.assertEqual( downloader._make_cmd('test', TEST_INFO), ['http', '--download', '--output', 'test', 'http://www.example.com/', 'Cookie:test=ytdlp']) class TestAxelFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = AxelFD(ydl, {}) self.assertEqual( downloader._make_cmd('test', TEST_INFO), ['axel', '-o', 'test', '--', 'http://www.example.com/']) # Test cookie header is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) self.assertEqual( downloader._make_cmd('test', TEST_INFO), ['axel', '-o', 'test', '-H', 'Cookie: test=ytdlp', '--max-redirect=0', '--', 'http://www.example.com/']) class TestWgetFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = WgetFD(ydl, {}) self.assertNotIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) # Test cookiejar tempfile arg is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) self.assertIn('--load-cookies', downloader._make_cmd('test', TEST_INFO)) class TestCurlFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = CurlFD(ydl, {}) self.assertNotIn('--cookie', downloader._make_cmd('test', TEST_INFO)) # Test cookie header is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) self.assertIn('--cookie', downloader._make_cmd('test', TEST_INFO)) self.assertIn('test=ytdlp', downloader._make_cmd('test', TEST_INFO)) class TestAria2cFD(unittest.TestCase): def test_make_cmd(self): with FakeYDL() as ydl: downloader = Aria2cFD(ydl, {}) downloader._make_cmd('test', TEST_INFO) self.assertFalse(hasattr(downloader, '_cookies_tempfile')) # Test cookiejar tempfile arg is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) cmd = downloader._make_cmd('test', TEST_INFO) self.assertIn(f'--load-cookies={downloader._cookies_tempfile}', cmd) @unittest.skipUnless(FFmpegFD.available(), 'ffmpeg not found') class TestFFmpegFD(unittest.TestCase): _args = [] def _test_cmd(self, args): self._args = args def test_make_cmd(self): with FakeYDL() as ydl: downloader = FFmpegFD(ydl, {}) downloader._debug_cmd = self._test_cmd downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'}) self.assertEqual(self._args, [ 'ffmpeg', '-y', '-hide_banner', '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test']) # Test cookies arg is added ydl.cookiejar.set_cookie(http.cookiejar.Cookie(**TEST_COOKIE)) downloader._call_downloader('test', {**TEST_INFO, 'ext': 'mp4'}) self.assertEqual(self._args, [ 'ffmpeg', '-y', '-hide_banner', '-cookies', 'test=ytdlp; path=/; domain=.example.com;\r\n', '-i', 'http://www.example.com/', '-c', 'copy', '-f', 'mp4', 'file:test']) # Test with non-url input (ffmpeg reads from stdin '-' for websockets) downloader._call_downloader('test', {'url': 'x', 'ext': 'mp4'}) self.assertEqual(self._args, [ 'ffmpeg', '-y', '-hide_banner', '-i', 'x', '-c', 'copy', '-f', 'mp4', 'file:test']) if __name__ == '__main__': unittest.main() �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_downloader_http.py������������������������������������������������������0000664�0000000�0000000�00000006335�14675634471�0021147�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.server import re import threading from test.helper import http_server_port, try_rm from yt_dlp import YoutubeDL from yt_dlp.downloader.http import HttpFD from yt_dlp.utils import encodeFilename from yt_dlp.utils._utils import _YDLLogger as FakeLogger TEST_DIR = os.path.dirname(os.path.abspath(__file__)) TEST_SIZE = 10 * 1024 class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass def send_content_range(self, total=None): range_header = self.headers.get('Range') start = end = None if range_header: mobj = re.search(r'^bytes=(\d+)-(\d+)', range_header) if mobj: start = int(mobj.group(1)) end = int(mobj.group(2)) valid_range = start is not None and end is not None if valid_range: content_range = f'bytes {start}-{end}' if total: content_range += f'/{total}' self.send_header('Content-Range', content_range) return (end - start + 1) if valid_range else total def serve(self, range=True, content_length=True): self.send_response(200) self.send_header('Content-Type', 'video/mp4') size = TEST_SIZE if range: size = self.send_content_range(TEST_SIZE) if content_length: self.send_header('Content-Length', size) self.end_headers() self.wfile.write(b'#' * size) def do_GET(self): if self.path == '/regular': self.serve() elif self.path == '/no-content-length': self.serve(content_length=False) elif self.path == '/no-range': self.serve(range=False) elif self.path == '/no-range-no-content-length': self.serve(range=False, content_length=False) else: assert False class TestHttpFD(unittest.TestCase): def setUp(self): self.httpd = http.server.HTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) self.port = http_server_port(self.httpd) self.server_thread = threading.Thread(target=self.httpd.serve_forever) self.server_thread.daemon = True self.server_thread.start() def download(self, params, ep): params['logger'] = FakeLogger() ydl = YoutubeDL(params) downloader = HttpFD(ydl, params) filename = 'testfile.mp4' try_rm(encodeFilename(filename)) self.assertTrue(downloader.real_download(filename, { 'url': f'http://127.0.0.1:{self.port}/{ep}', }), ep) self.assertEqual(os.path.getsize(encodeFilename(filename)), TEST_SIZE, ep) try_rm(encodeFilename(filename)) def download_all(self, params): for ep in ('regular', 'no-content-length', 'no-range', 'no-range-no-content-length'): self.download(params, ep) def test_regular(self): self.download_all({}) def test_chunked(self): self.download_all({ 'http_chunk_size': 1000, }) if __name__ == '__main__': unittest.main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_execution.py������������������������������������������������������������0000664�0000000�0000000�00000004001�14675634471�0017741�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import subprocess from yt_dlp.utils import Popen rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) LAZY_EXTRACTORS = 'yt_dlp/extractor/lazy_extractors.py' class TestExecution(unittest.TestCase): def run_yt_dlp(self, exe=(sys.executable, 'yt_dlp/__main__.py'), opts=('--version', )): stdout, stderr, returncode = Popen.run( [*exe, '--ignore-config', *opts], cwd=rootDir, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) print(stderr, file=sys.stderr) self.assertEqual(returncode, 0) return stdout.strip(), stderr.strip() def test_main_exec(self): self.run_yt_dlp() def test_import(self): self.run_yt_dlp(exe=(sys.executable, '-c', 'import yt_dlp')) def test_module_exec(self): self.run_yt_dlp(exe=(sys.executable, '-m', 'yt_dlp')) def test_cmdline_umlauts(self): _, stderr = self.run_yt_dlp(opts=('ä', '--version')) self.assertFalse(stderr) def test_lazy_extractors(self): try: subprocess.check_call([sys.executable, 'devscripts/make_lazy_extractors.py', LAZY_EXTRACTORS], cwd=rootDir, stdout=subprocess.DEVNULL) self.assertTrue(os.path.exists(LAZY_EXTRACTORS)) _, stderr = self.run_yt_dlp(opts=('-s', 'test:')) # `MIN_RECOMMENDED` emits a deprecated feature warning for deprecated Python versions if stderr and stderr.startswith('Deprecated Feature: Support for Python'): stderr = '' self.assertFalse(stderr) subprocess.check_call([sys.executable, 'test/test_all_urls.py'], cwd=rootDir, stdout=subprocess.DEVNULL) finally: with contextlib.suppress(OSError): os.remove(LAZY_EXTRACTORS) if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_http_proxy.py�����������������������������������������������������������0000664�0000000�0000000�00000040441�14675634471�0020166�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import abc import base64 import contextlib import functools import json import os import random import ssl import threading from http.server import BaseHTTPRequestHandler from socketserver import ThreadingTCPServer import pytest from test.helper import http_server_port, verify_address_availability from test.test_networking import TEST_DIR from test.test_socks import IPv6ThreadingTCPServer from yt_dlp.dependencies import urllib3 from yt_dlp.networking import Request from yt_dlp.networking.exceptions import HTTPError, ProxyError, SSLError class HTTPProxyAuthMixin: def proxy_auth_error(self): self.send_response(407) self.send_header('Proxy-Authenticate', 'Basic realm="test http proxy"') self.end_headers() return False def do_proxy_auth(self, username, password): if username is None and password is None: return True proxy_auth_header = self.headers.get('Proxy-Authorization', None) if proxy_auth_header is None: return self.proxy_auth_error() if not proxy_auth_header.startswith('Basic '): return self.proxy_auth_error() auth = proxy_auth_header[6:] try: auth_username, auth_password = base64.b64decode(auth).decode().split(':', 1) except Exception: return self.proxy_auth_error() if auth_username != (username or '') or auth_password != (password or ''): return self.proxy_auth_error() return True class HTTPProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): def __init__(self, *args, proxy_info=None, username=None, password=None, request_handler=None, **kwargs): self.username = username self.password = password self.proxy_info = proxy_info super().__init__(*args, **kwargs) def do_GET(self): if not self.do_proxy_auth(self.username, self.password): self.server.close_request(self.request) return if self.path.endswith('/proxy_info'): payload = json.dumps(self.proxy_info or { 'client_address': self.client_address, 'connect': False, 'connect_host': None, 'connect_port': None, 'headers': dict(self.headers), 'path': self.path, 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), }) self.send_response(200) self.send_header('Content-Type', 'application/json; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload.encode()) else: self.send_response(404) self.end_headers() self.server.close_request(self.request) if urllib3: import urllib3.util.ssltransport class SSLTransport(urllib3.util.ssltransport.SSLTransport): """ Modified version of urllib3 SSLTransport to support server side SSL This allows us to chain multiple TLS connections. """ def __init__(self, socket, ssl_context, server_hostname=None, suppress_ragged_eofs=True, server_side=False): self.incoming = ssl.MemoryBIO() self.outgoing = ssl.MemoryBIO() self.suppress_ragged_eofs = suppress_ragged_eofs self.socket = socket self.sslobj = ssl_context.wrap_bio( self.incoming, self.outgoing, server_hostname=server_hostname, server_side=server_side, ) self._ssl_io_loop(self.sslobj.do_handshake) @property def _io_refs(self): return self.socket._io_refs @_io_refs.setter def _io_refs(self, value): self.socket._io_refs = value def shutdown(self, *args, **kwargs): self.socket.shutdown(*args, **kwargs) else: SSLTransport = None class HTTPSProxyHandler(HTTPProxyHandler): def __init__(self, request, *args, **kwargs): certfn = os.path.join(TEST_DIR, 'testcert.pem') sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) if isinstance(request, ssl.SSLSocket): request = SSLTransport(request, ssl_context=sslctx, server_side=True) else: request = sslctx.wrap_socket(request, server_side=True) super().__init__(request, *args, **kwargs) class HTTPConnectProxyHandler(BaseHTTPRequestHandler, HTTPProxyAuthMixin): protocol_version = 'HTTP/1.1' default_request_version = 'HTTP/1.1' def __init__(self, *args, username=None, password=None, request_handler=None, **kwargs): self.username = username self.password = password self.request_handler = request_handler super().__init__(*args, **kwargs) def do_CONNECT(self): if not self.do_proxy_auth(self.username, self.password): self.server.close_request(self.request) return self.send_response(200) self.end_headers() proxy_info = { 'client_address': self.client_address, 'connect': True, 'connect_host': self.path.split(':')[0], 'connect_port': int(self.path.split(':')[1]), 'headers': dict(self.headers), 'path': self.path, 'proxy': ':'.join(str(y) for y in self.connection.getsockname()), } self.request_handler(self.request, self.client_address, self.server, proxy_info=proxy_info) self.server.close_request(self.request) class HTTPSConnectProxyHandler(HTTPConnectProxyHandler): def __init__(self, request, *args, **kwargs): certfn = os.path.join(TEST_DIR, 'testcert.pem') sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) request = sslctx.wrap_socket(request, server_side=True) self._original_request = request super().__init__(request, *args, **kwargs) def do_CONNECT(self): super().do_CONNECT() self.server.close_request(self._original_request) @contextlib.contextmanager def proxy_server(proxy_server_class, request_handler, bind_ip=None, **proxy_server_kwargs): server = server_thread = None try: bind_address = bind_ip or '127.0.0.1' server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer server = server_type( (bind_address, 0), functools.partial(proxy_server_class, request_handler=request_handler, **proxy_server_kwargs)) server_port = http_server_port(server) server_thread = threading.Thread(target=server.serve_forever) server_thread.daemon = True server_thread.start() if '.' not in bind_address: yield f'[{bind_address}]:{server_port}' else: yield f'{bind_address}:{server_port}' finally: server.shutdown() server.server_close() server_thread.join(2.0) class HTTPProxyTestContext(abc.ABC): REQUEST_HANDLER_CLASS = None REQUEST_PROTO = None def http_server(self, server_class, *args, **kwargs): return proxy_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) @abc.abstractmethod def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict: """return a dict of proxy_info""" class HTTPProxyHTTPTestContext(HTTPProxyTestContext): # Standard HTTP Proxy for http requests REQUEST_HANDLER_CLASS = HTTPProxyHandler REQUEST_PROTO = 'http' def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) handler.validate(request) return json.loads(handler.send(request).read().decode()) class HTTPProxyHTTPSTestContext(HTTPProxyTestContext): # HTTP Connect proxy, for https requests REQUEST_HANDLER_CLASS = HTTPSProxyHandler REQUEST_PROTO = 'https' def proxy_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): request = Request(f'https://{target_domain or "127.0.0.1"}:{target_port or "40000"}/proxy_info', **req_kwargs) handler.validate(request) return json.loads(handler.send(request).read().decode()) CTX_MAP = { 'http': HTTPProxyHTTPTestContext, 'https': HTTPProxyHTTPSTestContext, } @pytest.fixture(scope='module') def ctx(request): return CTX_MAP[request.param]() @pytest.mark.parametrize( 'handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) @pytest.mark.parametrize('ctx', ['http'], indirect=True) # pure http proxy can only support http class TestHTTPProxy: def test_http_no_auth(self, handler, ctx): with ctx.http_server(HTTPProxyHandler) as server_address: with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert proxy_info['connect'] is False assert 'Proxy-Authorization' not in proxy_info['headers'] def test_http_auth(self, handler, ctx): with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: with handler(proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert 'Proxy-Authorization' in proxy_info['headers'] def test_http_bad_auth(self, handler, ctx): with ctx.http_server(HTTPProxyHandler, username='test', password='test') as server_address: with handler(proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: with pytest.raises(HTTPError) as exc_info: ctx.proxy_info_request(rh) assert exc_info.value.response.status == 407 exc_info.value.response.close() def test_http_source_address(self, handler, ctx): with ctx.http_server(HTTPProxyHandler) as server_address: source_address = f'127.0.0.{random.randint(5, 255)}' verify_address_availability(source_address) with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, source_address=source_address) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert proxy_info['client_address'][0] == source_address @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') def test_https(self, handler, ctx): with ctx.http_server(HTTPSProxyHandler) as server_address: with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert proxy_info['connect'] is False assert 'Proxy-Authorization' not in proxy_info['headers'] @pytest.mark.skip_handler('Urllib', 'urllib does not support https proxies') def test_https_verify_failed(self, handler, ctx): with ctx.http_server(HTTPSProxyHandler) as server_address: with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: # Accept SSLError as may not be feasible to tell if it is proxy or request error. # note: if request proto also does ssl verification, this may also be the error of the request. # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. with pytest.raises((ProxyError, SSLError)): ctx.proxy_info_request(rh) def test_http_with_idn(self, handler, ctx): with ctx.http_server(HTTPProxyHandler) as server_address: with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh, target_domain='中文.tw') assert proxy_info['proxy'] == server_address assert proxy_info['path'].startswith('http://xn--fiq228c.tw') assert proxy_info['headers']['Host'].split(':', 1)[0] == 'xn--fiq228c.tw' @pytest.mark.parametrize( 'handler,ctx', [ ('Requests', 'https'), ('CurlCFFI', 'https'), ], indirect=True) class TestHTTPConnectProxy: def test_http_connect_no_auth(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler) as server_address: with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert proxy_info['connect'] is True assert 'Proxy-Authorization' not in proxy_info['headers'] def test_http_connect_auth(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:test@{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert 'Proxy-Authorization' in proxy_info['headers'] @pytest.mark.skip_handler( 'Requests', 'bug in urllib3 causes unclosed socket: https://github.com/urllib3/urllib3/issues/3374', ) def test_http_connect_bad_auth(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler, username='test', password='test') as server_address: with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'http://test:bad@{server_address}'}) as rh: with pytest.raises(ProxyError): ctx.proxy_info_request(rh) def test_http_connect_source_address(self, handler, ctx): with ctx.http_server(HTTPConnectProxyHandler) as server_address: source_address = f'127.0.0.{random.randint(5, 255)}' verify_address_availability(source_address) with handler(proxies={ctx.REQUEST_PROTO: f'http://{server_address}'}, source_address=source_address, verify=False) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert proxy_info['client_address'][0] == source_address @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') def test_https_connect_proxy(self, handler, ctx): with ctx.http_server(HTTPSConnectProxyHandler) as server_address: with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert proxy_info['connect'] is True assert 'Proxy-Authorization' not in proxy_info['headers'] @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') def test_https_connect_verify_failed(self, handler, ctx): with ctx.http_server(HTTPSConnectProxyHandler) as server_address: with handler(verify=True, proxies={ctx.REQUEST_PROTO: f'https://{server_address}'}) as rh: # Accept SSLError as may not be feasible to tell if it is proxy or request error. # note: if request proto also does ssl verification, this may also be the error of the request. # Until we can support passing custom cacerts to handlers, we cannot properly test this for all cases. with pytest.raises((ProxyError, SSLError)): ctx.proxy_info_request(rh) @pytest.mark.skipif(urllib3 is None, reason='requires urllib3 to test') def test_https_connect_proxy_auth(self, handler, ctx): with ctx.http_server(HTTPSConnectProxyHandler, username='test', password='test') as server_address: with handler(verify=False, proxies={ctx.REQUEST_PROTO: f'https://test:test@{server_address}'}) as rh: proxy_info = ctx.proxy_info_request(rh) assert proxy_info['proxy'] == server_address assert 'Proxy-Authorization' in proxy_info['headers'] �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_iqiyi_sdk_interpreter.py������������������������������������������������0000664�0000000�0000000�00000001743�14675634471�0022360�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import IqiyiIE class WarningLogger: def __init__(self): self.messages = [] def warning(self, msg): self.messages.append(msg) def debug(self, msg): pass def error(self, msg): pass @is_download_test class TestIqiyiSDKInterpreter(unittest.TestCase): def test_iqiyi_sdk_interpreter(self): """ Test the functionality of IqiyiSDKInterpreter by trying to log in If `sign` is incorrect, /validate call throws an HTTP 556 error """ logger = WarningLogger() ie = IqiyiIE(FakeYDL({'logger': logger})) ie._perform_login('foo', 'bar') self.assertTrue('unable to log in:' in logger.messages[0]) if __name__ == '__main__': unittest.main() �����������������������������yt-dlp-2024.09.27/test/test_jsinterp.py�������������������������������������������������������������0000664�0000000�0000000�00000052440�14675634471�0017606�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import math from yt_dlp.jsinterp import JS_Undefined, JSInterpreter class NaN: pass class TestJSInterpreter(unittest.TestCase): def _test(self, jsi_or_code, expected, func='f', args=()): if isinstance(jsi_or_code, str): jsi_or_code = JSInterpreter(jsi_or_code) got = jsi_or_code.call_function(func, *args) if expected is NaN: self.assertTrue(math.isnan(got), f'{got} is not NaN') else: self.assertEqual(got, expected) def test_basic(self): jsi = JSInterpreter('function f(){;}') self.assertEqual(repr(jsi.extract_function('f')), 'F<f>') self._test(jsi, None) self._test('function f(){return 42;}', 42) self._test('function f(){42}', None) self._test('var f = function(){return 42;}', 42) def test_add(self): self._test('function f(){return 42 + 7;}', 49) self._test('function f(){return 42 + undefined;}', NaN) self._test('function f(){return 42 + null;}', 42) def test_sub(self): self._test('function f(){return 42 - 7;}', 35) self._test('function f(){return 42 - undefined;}', NaN) self._test('function f(){return 42 - null;}', 42) def test_mul(self): self._test('function f(){return 42 * 7;}', 294) self._test('function f(){return 42 * undefined;}', NaN) self._test('function f(){return 42 * null;}', 0) def test_div(self): jsi = JSInterpreter('function f(a, b){return a / b;}') self._test(jsi, NaN, args=(0, 0)) self._test(jsi, NaN, args=(JS_Undefined, 1)) self._test(jsi, float('inf'), args=(2, 0)) self._test(jsi, 0, args=(0, 3)) def test_mod(self): self._test('function f(){return 42 % 7;}', 0) self._test('function f(){return 42 % 0;}', NaN) self._test('function f(){return 42 % undefined;}', NaN) def test_exp(self): self._test('function f(){return 42 ** 2;}', 1764) self._test('function f(){return 42 ** undefined;}', NaN) self._test('function f(){return 42 ** null;}', 1) self._test('function f(){return undefined ** 42;}', NaN) def test_calc(self): self._test('function f(a){return 2*a+1;}', 7, args=[3]) def test_empty_return(self): self._test('function f(){return; y()}', None) def test_morespace(self): self._test('function f (a) { return 2 * a + 1 ; }', 7, args=[3]) self._test('function f () { x = 2 ; return x; }', 2) def test_strange_chars(self): self._test('function $_xY1 ($_axY1) { var $_axY2 = $_axY1 + 1; return $_axY2; }', 21, args=[20], func='$_xY1') def test_operators(self): self._test('function f(){return 1 << 5;}', 32) self._test('function f(){return 2 ** 5}', 32) self._test('function f(){return 19 & 21;}', 17) self._test('function f(){return 11 >> 2;}', 2) self._test('function f(){return []? 2+3: 4;}', 5) self._test('function f(){return 1 == 2}', False) self._test('function f(){return 0 && 1 || 2;}', 2) self._test('function f(){return 0 ?? 42;}', 0) self._test('function f(){return "life, the universe and everything" < 42;}', False) self._test('function f(){return 0 - 7 * - 6;}', 42) def test_array_access(self): self._test('function f(){var x = [1,2,3]; x[0] = 4; x[0] = 5; x[2.0] = 7; return x;}', [5, 2, 7]) def test_parens(self): self._test('function f(){return (1) + (2) * ((( (( (((((3)))))) )) ));}', 7) self._test('function f(){return (1 + 2) * 3;}', 9) def test_quotes(self): self._test(R'function f(){return "a\"\\("}', R'a"\(') def test_assignments(self): self._test('function f(){var x = 20; x = 30 + 1; return x;}', 31) self._test('function f(){var x = 20; x += 30 + 1; return x;}', 51) self._test('function f(){var x = 20; x -= 30 + 1; return x;}', -11) @unittest.skip('Not implemented') def test_comments(self): self._test(''' function f() { var x = /* 1 + */ 2; var y = /* 30 * 40 */ 50; return x + y; } ''', 52) self._test(''' function f() { var x = "/*"; var y = 1 /* comment */ + 2; return y; } ''', 3) def test_precedence(self): self._test(''' function f() { var a = [10, 20, 30, 40, 50]; var b = 6; a[0]=a[b%a.length]; return a; } ''', [20, 20, 30, 40, 50]) def test_builtins(self): self._test('function f() { return NaN }', NaN) def test_date(self): self._test('function f() { return new Date("Wednesday 31 December 1969 18:01:26 MDT") - 0; }', 86000) jsi = JSInterpreter('function f(dt) { return new Date(dt) - 0; }') self._test(jsi, 86000, args=['Wednesday 31 December 1969 18:01:26 MDT']) self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT']) # m/d/y self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC']) def test_call(self): jsi = JSInterpreter(''' function x() { return 2; } function y(a) { return x() + (a?a:0); } function z() { return y(3); } ''') self._test(jsi, 5, func='z') self._test(jsi, 2, func='y') def test_if(self): self._test(''' function f() { let a = 9; if (0==0) {a++} return a } ''', 10) self._test(''' function f() { if (0==0) {return 10} } ''', 10) self._test(''' function f() { if (0!=0) {return 1} else {return 10} } ''', 10) """ # Unsupported self._test(''' function f() { if (0!=0) {return 1} else if (1==0) {return 2} else {return 10} } ''', 10) """ def test_for_loop(self): self._test('function f() { a=0; for (i=0; i-10; i++) {a++} return a }', 10) def test_switch(self): jsi = JSInterpreter(''' function f(x) { switch(x){ case 1:x+=1; case 2:x+=2; case 3:x+=3;break; case 4:x+=4; default:x=0; } return x } ''') self._test(jsi, 7, args=[1]) self._test(jsi, 6, args=[3]) self._test(jsi, 0, args=[5]) def test_switch_default(self): jsi = JSInterpreter(''' function f(x) { switch(x){ case 2: x+=2; default: x-=1; case 5: case 6: x+=6; case 0: break; case 1: x+=1; } return x } ''') self._test(jsi, 2, args=[1]) self._test(jsi, 11, args=[5]) self._test(jsi, 14, args=[9]) def test_try(self): self._test('function f() { try{return 10} catch(e){return 5} }', 10) def test_catch(self): self._test('function f() { try{throw 10} catch(e){return 5} }', 5) def test_finally(self): self._test('function f() { try{throw 10} finally {return 42} }', 42) self._test('function f() { try{throw 10} catch(e){return 5} finally {return 42} }', 42) def test_nested_try(self): self._test(''' function f() {try { try{throw 10} finally {throw 42} } catch(e){return 5} } ''', 5) def test_for_loop_continue(self): self._test('function f() { a=0; for (i=0; i-10; i++) { continue; a++ } return a }', 0) def test_for_loop_break(self): self._test('function f() { a=0; for (i=0; i-10; i++) { break; a++ } return a }', 0) def test_for_loop_try(self): self._test(''' function f() { for (i=0; i-10; i++) { try { if (i == 5) throw i} catch {return 10} finally {break} }; return 42 } ''', 42) def test_literal_list(self): self._test('function f() { return [1, 2, "asdf", [5, 6, 7]][3] }', [5, 6, 7]) def test_comma(self): self._test('function f() { a=5; a -= 1, a+=3; return a }', 7) self._test('function f() { a=5; return (a -= 1, a+=3, a); }', 7) self._test('function f() { return (l=[0,1,2,3], function(a, b){return a+b})((l[1], l[2]), l[3]) }', 5) def test_void(self): self._test('function f() { return void 42; }', None) def test_return_function(self): jsi = JSInterpreter(''' function f() { return [1, function(){return 1}][1] } ''') self.assertEqual(jsi.call_function('f')([]), 1) def test_null(self): self._test('function f() { return null; }', None) self._test('function f() { return [null > 0, null < 0, null == 0, null === 0]; }', [False, False, False, False]) self._test('function f() { return [null >= 0, null <= 0]; }', [True, True]) def test_undefined(self): self._test('function f() { return undefined === undefined; }', True) self._test('function f() { return undefined; }', JS_Undefined) self._test('function f() {return undefined ?? 42; }', 42) self._test('function f() { let v; return v; }', JS_Undefined) self._test('function f() { let v; return v**0; }', 1) self._test('function f() { let v; return [v>42, v<=42, v&&42, 42&&v]; }', [False, False, JS_Undefined, JS_Undefined]) self._test(''' function f() { return [ undefined === undefined, undefined == undefined, undefined == null, undefined < undefined, undefined > undefined, undefined === 0, undefined == 0, undefined < 0, undefined > 0, undefined >= 0, undefined <= 0, undefined > null, undefined < null, undefined === null ]; } ''', list(map(bool, (1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)))) jsi = JSInterpreter(''' function f() { let v; return [42+v, v+42, v**42, 42**v, 0**v]; } ''') for y in jsi.call_function('f'): self.assertTrue(math.isnan(y)) def test_object(self): self._test('function f() { return {}; }', {}) self._test('function f() { let a = {m1: 42, m2: 0 }; return [a["m1"], a.m2]; }', [42, 0]) self._test('function f() { let a; return a?.qq; }', JS_Undefined) self._test('function f() { let a = {m1: 42, m2: 0 }; return a?.qq; }', JS_Undefined) def test_regex(self): self._test('function f() { let a=/,,[/,913,/](,)}/; }', None) self._test('function f() { let a=/,,[/,913,/](,)}/; return a; }', R'/,,[/,913,/](,)}/0') R''' # We are not compiling regex jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/; return a; }') self.assertIsInstance(jsi.call_function('f'), re.Pattern) jsi = JSInterpreter('function f() { let a=/,,[/,913,/](,)}/i; return a; }') self.assertEqual(jsi.call_function('f').flags & re.I, re.I) jsi = JSInterpreter(R'function f() { let a=/,][}",],()}(\[)/; return a; }') self.assertEqual(jsi.call_function('f').pattern, r',][}",],()}(\[)') jsi = JSInterpreter(R'function f() { let a=[/[)\\]/]; return a[0]; }') self.assertEqual(jsi.call_function('f').pattern, r'[)\\]') ''' @unittest.skip('Not implemented') def test_replace(self): self._test('function f() { let a="data-name".replace("data-", ""); return a }', 'name') self._test('function f() { let a="data-name".replace(new RegExp("^.+-"), ""); return a; }', 'name') self._test('function f() { let a="data-name".replace(/^.+-/, ""); return a; }', 'name') self._test('function f() { let a="data-name".replace(/a/g, "o"); return a; }', 'doto-nome') self._test('function f() { let a="data-name".replaceAll("a", "o"); return a; }', 'doto-nome') def test_char_code_at(self): jsi = JSInterpreter('function f(i){return "test".charCodeAt(i)}') self._test(jsi, 116, args=[0]) self._test(jsi, 101, args=[1]) self._test(jsi, 115, args=[2]) self._test(jsi, 116, args=[3]) self._test(jsi, None, args=[4]) self._test(jsi, 116, args=['not_a_number']) def test_bitwise_operators_overflow(self): self._test('function f(){return -524999584 << 5}', 379882496) self._test('function f(){return 1236566549 << 5}', 915423904) def test_bitwise_operators_typecast(self): self._test('function f(){return null << 5}', 0) self._test('function f(){return undefined >> 5}', 0) self._test('function f(){return 42 << NaN}', 42) def test_negative(self): self._test('function f(){return 2 * -2.0 ;}', -4) self._test('function f(){return 2 - - -2 ;}', 0) self._test('function f(){return 2 - - - -2 ;}', 4) self._test('function f(){return 2 - + + - -2;}', 0) self._test('function f(){return 2 + - + - -2;}', 0) @unittest.skip('Not implemented') def test_packed(self): jsi = JSInterpreter('''function f(p,a,c,k,e,d){while(c--)if(k[c])p=p.replace(new RegExp('\\b'+c.toString(a)+'\\b','g'),k[c]);return p}''') self.assertEqual(jsi.call_function('f', '''h 7=g("1j");7.7h({7g:[{33:"w://7f-7e-7d-7c.v.7b/7a/79/78/77/76.74?t=73&s=2s&e=72&f=2t&71=70.0.0.1&6z=6y&6x=6w"}],6v:"w://32.v.u/6u.31",16:"r%",15:"r%",6t:"6s",6r:"",6q:"l",6p:"l",6o:"6n",6m:\'6l\',6k:"6j",9:[{33:"/2u?b=6i&n=50&6h=w://32.v.u/6g.31",6f:"6e"}],1y:{6d:1,6c:\'#6b\',6a:\'#69\',68:"67",66:30,65:r,},"64":{63:"%62 2m%m%61%5z%5y%5x.u%5w%5v%5u.2y%22 2k%m%1o%22 5t%m%1o%22 5s%m%1o%22 2j%m%5r%22 16%m%5q%22 15%m%5p%22 5o%2z%5n%5m%2z",5l:"w://v.u/d/1k/5k.2y",5j:[]},\'5i\':{"5h":"5g"},5f:"5e",5d:"w://v.u",5c:{},5b:l,1x:[0.25,0.50,0.75,1,1.25,1.5,2]});h 1m,1n,5a;h 59=0,58=0;h 7=g("1j");h 2x=0,57=0,56=0;$.55({54:{\'53-52\':\'2i-51\'}});7.j(\'4z\',6(x){c(5>0&&x.1l>=5&&1n!=1){1n=1;$(\'q.4y\').4x(\'4w\')}});7.j(\'13\',6(x){2x=x.1l});7.j(\'2g\',6(x){2w(x)});7.j(\'4v\',6(){$(\'q.2v\').4u()});6 2w(x){$(\'q.2v\').4t();c(1m)19;1m=1;17=0;c(4s.4r===l){17=1}$.4q(\'/2u?b=4p&2l=1k&4o=2t-4n-4m-2s-4l&4k=&4j=&4i=&17=\'+17,6(2r){$(\'#4h\').4g(2r)});$(\'.3-8-4f-4e:4d("4c")\').2h(6(e){2q();g().4b(0);g().4a(l)});6 2q(){h $14=$("<q />").2p({1l:"49",16:"r%",15:"r%",48:0,2n:0,2o:47,46:"45(10%, 10%, 10%, 0.4)","44-43":"42"});$("<41 />").2p({16:"60%",15:"60%",2o:40,"3z-2n":"3y"}).3x({\'2m\':\'/?b=3w&2l=1k\',\'2k\':\'0\',\'2j\':\'2i\'}).2f($14);$14.2h(6(){$(3v).3u();g().2g()});$14.2f($(\'#1j\'))}g().13(0);}6 3t(){h 9=7.1b(2e);2d.2c(9);c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==2e){2d.2c(\'!!=\'+i);7.1p(i)}}}}7.j(\'3s\',6(){g().1h("/2a/3r.29","3q 10 28",6(){g().13(g().27()+10)},"2b");$("q[26=2b]").23().21(\'.3-20-1z\');g().1h("/2a/3p.29","3o 10 28",6(){h 12=g().27()-10;c(12<0)12=0;g().13(12)},"24");$("q[26=24]").23().21(\'.3-20-1z\');});6 1i(){}7.j(\'3n\',6(){1i()});7.j(\'3m\',6(){1i()});7.j("k",6(y){h 9=7.1b();c(9.n<2)19;$(\'.3-8-3l-3k\').3j(6(){$(\'#3-8-a-k\').1e(\'3-8-a-z\');$(\'.3-a-k\').p(\'o-1f\',\'11\')});7.1h("/3i/3h.3g","3f 3e",6(){$(\'.3-1w\').3d(\'3-8-1v\');$(\'.3-8-1y, .3-8-1x\').p(\'o-1g\',\'11\');c($(\'.3-1w\').3c(\'3-8-1v\')){$(\'.3-a-k\').p(\'o-1g\',\'l\');$(\'.3-a-k\').p(\'o-1f\',\'l\');$(\'.3-8-a\').1e(\'3-8-a-z\');$(\'.3-8-a:1u\').3b(\'3-8-a-z\')}3a{$(\'.3-a-k\').p(\'o-1g\',\'11\');$(\'.3-a-k\').p(\'o-1f\',\'11\');$(\'.3-8-a:1u\').1e(\'3-8-a-z\')}},"39");7.j("38",6(y){1d.37(\'1c\',y.9[y.36].1a)});c(1d.1t(\'1c\')){35("1s(1d.1t(\'1c\'));",34)}});h 18;6 1s(1q){h 9=7.1b();c(9.n>1){1r(i=0;i<9.n;i++){c(9[i].1a==1q){c(i==18){19}18=i;7.1p(i)}}}}',36,270,'|||jw|||function|player|settings|tracks|submenu||if||||jwplayer|var||on|audioTracks|true|3D|length|aria|attr|div|100|||sx|filemoon|https||event|active||false|tt|seek|dd|height|width|adb|current_audio|return|name|getAudioTracks|default_audio|localStorage|removeClass|expanded|checked|addButton|callMeMaybe|vplayer|0fxcyc2ajhp1|position|vvplay|vvad|220|setCurrentAudioTrack|audio_name|for|audio_set|getItem|last|open|controls|playbackRates|captions|rewind|icon|insertAfter||detach|ff00||button|getPosition|sec|png|player8|ff11|log|console|track_name|appendTo|play|click|no|scrolling|frameborder|file_code|src|top|zIndex|css|showCCform|data|1662367683|383371|dl|video_ad|doPlay|prevt|mp4|3E||jpg|thumbs|file|300|setTimeout|currentTrack|setItem|audioTrackChanged|dualSound|else|addClass|hasClass|toggleClass|Track|Audio|svg|dualy|images|mousedown|buttons|topbar|playAttemptFailed|beforePlay|Rewind|fr|Forward|ff|ready|set_audio_track|remove|this|upload_srt|prop|50px|margin|1000001|iframe|center|align|text|rgba|background|1000000|left|absolute|pause|setCurrentCaptions|Upload|contains|item|content|html|fviews|referer|prem|embed|3e57249ef633e0d03bf76ceb8d8a4b65|216|83|hash|view|get|TokenZir|window|hide|show|complete|slow|fadeIn|video_ad_fadein|time||cache|Cache|Content|headers|ajaxSetup|v2done|tott|vastdone2|vastdone1|vvbefore|playbackRateControls|cast|aboutlink|FileMoon|abouttext|UHD|1870|qualityLabels|sites|GNOME_POWER|link|2Fiframe|3C|allowfullscreen|22360|22640|22no|marginheight|marginwidth|2FGNOME_POWER|2F0fxcyc2ajhp1|2Fe|2Ffilemoon|2F|3A||22https|3Ciframe|code|sharing|fontOpacity|backgroundOpacity|Tahoma|fontFamily|303030|backgroundColor|FFFFFF|color|userFontScale|thumbnails|kind|0fxcyc2ajhp10000|url|get_slides|start|startparam|none|preload|html5|primary|hlshtml|androidhls|duration|uniform|stretching|0fxcyc2ajhp1_xt|image|2048|sp|6871|asn|127|srv|43200|_g3XlBcu2lmD9oDexD2NLWSmah2Nu3XcDrl93m9PwXY|m3u8||master|0fxcyc2ajhp1_x|00076|01|hls2|to|s01|delivery|storage|moon|sources|setup'''.split('|'))) def test_join(self): test_input = list('test') tests = [ 'function f(a, b){return a.join(b)}', 'function f(a, b){return Array.prototype.join.call(a, b)}', 'function f(a, b){return Array.prototype.join.apply(a, [b])}', ] for test in tests: jsi = JSInterpreter(test) self._test(jsi, 'test', args=[test_input, '']) self._test(jsi, 't-e-s-t', args=[test_input, '-']) self._test(jsi, '', args=[[], '-']) def test_split(self): test_result = list('test') tests = [ 'function f(a, b){return a.split(b)}', 'function f(a, b){return String.prototype.split.call(a, b)}', 'function f(a, b){return String.prototype.split.apply(a, [b])}', ] for test in tests: jsi = JSInterpreter(test) self._test(jsi, test_result, args=['test', '']) self._test(jsi, test_result, args=['t-e-s-t', '-']) self._test(jsi, [''], args=['', '-']) self._test(jsi, [], args=['', '']) def test_slice(self): self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice()}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0)}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(5)}', [5, 6, 7, 8]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(99)}', []) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-2)}', [7, 8]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-99)}', [0, 1, 2, 3, 4, 5, 6, 7, 8]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 0)}', []) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, 0)}', []) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(0, 1)}', [0]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(3, 6)}', [3, 4, 5]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(1, -1)}', [1, 2, 3, 4, 5, 6, 7]) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-1, 1)}', []) self._test('function f(){return [0, 1, 2, 3, 4, 5, 6, 7, 8].slice(-3, -1)}', [6, 7]) self._test('function f(){return "012345678".slice()}', '012345678') self._test('function f(){return "012345678".slice(0)}', '012345678') self._test('function f(){return "012345678".slice(5)}', '5678') self._test('function f(){return "012345678".slice(99)}', '') self._test('function f(){return "012345678".slice(-2)}', '78') self._test('function f(){return "012345678".slice(-99)}', '012345678') self._test('function f(){return "012345678".slice(0, 0)}', '') self._test('function f(){return "012345678".slice(1, 0)}', '') self._test('function f(){return "012345678".slice(0, 1)}', '0') self._test('function f(){return "012345678".slice(3, 6)}', '345') self._test('function f(){return "012345678".slice(1, -1)}', '1234567') self._test('function f(){return "012345678".slice(-1, 1)}', '') self._test('function f(){return "012345678".slice(-3, -1)}', '67') if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_netrc.py����������������������������������������������������������������0000664�0000000�0000000�00000001306�14675634471�0017056�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.extractor import gen_extractor_classes from yt_dlp.extractor.common import InfoExtractor NO_LOGIN = InfoExtractor._perform_login class TestNetRc(unittest.TestCase): def test_netrc_present(self): for ie in gen_extractor_classes(): if ie._perform_login is NO_LOGIN: continue self.assertTrue( ie._NETRC_MACHINE, f'Extractor {ie.IE_NAME} supports login, but is missing a _NETRC_MACHINE property') if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_networking.py�����������������������������������������������������������0000664�0000000�0000000�00000257525�14675634471�0020152�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import pytest from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import gzip import http.client import http.cookiejar import http.server import io import logging import pathlib import random import ssl import tempfile import threading import time import urllib.error import urllib.request import warnings import zlib from email.message import Message from http.cookiejar import CookieJar from test.helper import ( FakeYDL, http_server_port, validate_and_send, verify_address_availability, ) from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import brotli, curl_cffi, requests, urllib3 from yt_dlp.networking import ( HEADRequest, PUTRequest, Request, RequestDirector, RequestHandler, Response, ) from yt_dlp.networking._urllib import UrllibRH from yt_dlp.networking.exceptions import ( CertificateVerifyError, HTTPError, IncompleteRead, NoSupportingHandlers, ProxyError, RequestError, SSLError, TransportError, UnsupportedRequest, ) from yt_dlp.networking.impersonate import ( ImpersonateRequestHandler, ImpersonateTarget, ) from yt_dlp.utils import YoutubeDLError from yt_dlp.utils._utils import _YDLLogger as FakeLogger from yt_dlp.utils.networking import HTTPHeaderDict, std_headers TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class HTTPTestRequestHandler(http.server.BaseHTTPRequestHandler): protocol_version = 'HTTP/1.1' default_request_version = 'HTTP/1.1' def log_message(self, format, *args): pass def _headers(self): payload = str(self.headers).encode() self.send_response(200) self.send_header('Content-Type', 'application/json') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) def _redirect(self): self.send_response(int(self.path[len('/redirect_'):])) self.send_header('Location', '/method') self.send_header('Content-Length', '0') self.end_headers() def _method(self, method, payload=None): self.send_response(200) self.send_header('Content-Length', str(len(payload or ''))) self.send_header('Method', method) self.end_headers() if payload: self.wfile.write(payload) def _status(self, status): payload = f'<html>{status} NOT FOUND</html>'.encode() self.send_response(int(status)) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) def _read_data(self): if 'Content-Length' in self.headers: return self.rfile.read(int(self.headers['Content-Length'])) else: return b'' def do_POST(self): data = self._read_data() + str(self.headers).encode() if self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): self._method('POST', data) elif self.path.startswith('/headers'): self._headers() else: self._status(404) def do_HEAD(self): if self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): self._method('HEAD') else: self._status(404) def do_PUT(self): data = self._read_data() + str(self.headers).encode() if self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): self._method('PUT', data) else: self._status(404) def do_GET(self): if self.path == '/video.html': payload = b'<html><video src="/vid.mp4" /></html>' self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path == '/vid.mp4': payload = b'\x00\x00\x00\x00\x20\x66\x74[video]' self.send_response(200) self.send_header('Content-Type', 'video/mp4') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path == '/%E4%B8%AD%E6%96%87.html': payload = b'<html><video src="/vid.mp4" /></html>' self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path == '/%c7%9f': payload = b'<html><video src="/vid.mp4" /></html>' self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path.startswith('/redirect_loop'): self.send_response(301) self.send_header('Location', self.path) self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/redirect_dotsegments': self.send_response(301) # redirect to /headers but with dot segments before self.send_header('Location', '/a/b/./../../headers') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/redirect_dotsegments_absolute': self.send_response(301) # redirect to /headers but with dot segments before - absolute url self.send_header('Location', f'http://127.0.0.1:{http_server_port(self.server)}/a/b/./../../headers') self.send_header('Content-Length', '0') self.end_headers() elif self.path.startswith('/redirect_'): self._redirect() elif self.path.startswith('/method'): self._method('GET', str(self.headers).encode()) elif self.path.startswith('/headers'): self._headers() elif self.path.startswith('/308-to-headers'): self.send_response(308) # redirect to "localhost" for testing cookie redirection handling self.send_header('Location', f'http://localhost:{self.connection.getsockname()[1]}/headers') self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/trailing_garbage': payload = b'<html><video src="/vid.mp4" /></html>' self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Encoding', 'gzip') buf = io.BytesIO() with gzip.GzipFile(fileobj=buf, mode='wb') as f: f.write(payload) compressed = buf.getvalue() + b'trailing garbage' self.send_header('Content-Length', str(len(compressed))) self.end_headers() self.wfile.write(compressed) elif self.path == '/302-non-ascii-redirect': new_url = f'http://127.0.0.1:{http_server_port(self.server)}/中文.html' self.send_response(301) self.send_header('Location', new_url) self.send_header('Content-Length', '0') self.end_headers() elif self.path == '/content-encoding': encodings = self.headers.get('ytdl-encoding', '') payload = b'<html><video src="/vid.mp4" /></html>' for encoding in filter(None, (e.strip() for e in encodings.split(','))): if encoding == 'br' and brotli: payload = brotli.compress(payload) elif encoding == 'gzip': buf = io.BytesIO() with gzip.GzipFile(fileobj=buf, mode='wb') as f: f.write(payload) payload = buf.getvalue() elif encoding == 'deflate': payload = zlib.compress(payload) elif encoding == 'unsupported': payload = b'raw' break else: self._status(415) return self.send_response(200) self.send_header('Content-Encoding', encodings) self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path.startswith('/gen_'): payload = b'<html></html>' self.send_response(int(self.path[len('/gen_'):])) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) elif self.path.startswith('/incompleteread'): payload = b'<html></html>' self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', '234234') self.end_headers() self.wfile.write(payload) self.finish() elif self.path.startswith('/timeout_'): time.sleep(int(self.path[len('/timeout_'):])) self._headers() elif self.path == '/source_address': payload = str(self.client_address[0]).encode() self.send_response(200) self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload) self.finish() elif self.path == '/get_cookie': self.send_response(200) self.send_header('Set-Cookie', 'test=ytdlp; path=/') self.end_headers() self.finish() else: self._status(404) def send_header(self, keyword, value): """ Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers. This is against what is defined in RFC 3986, however we need to test we support this since some sites incorrectly do this. """ if keyword.lower() == 'connection': return super().send_header(keyword, value) if not hasattr(self, '_headers_buffer'): self._headers_buffer = [] self._headers_buffer.append(f'{keyword}: {value}\r\n'.encode()) class TestRequestHandlerBase: @classmethod def setup_class(cls): cls.http_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) cls.http_port = http_server_port(cls.http_httpd) cls.http_server_thread = threading.Thread(target=cls.http_httpd.serve_forever) # FIXME: we should probably stop the http server thread after each test # See: https://github.com/yt-dlp/yt-dlp/pull/7094#discussion_r1199746041 cls.http_server_thread.daemon = True cls.http_server_thread.start() # HTTPS server certfn = os.path.join(TEST_DIR, 'testcert.pem') cls.https_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) cls.https_httpd.socket = sslctx.wrap_socket(cls.https_httpd.socket, server_side=True) cls.https_port = http_server_port(cls.https_httpd) cls.https_server_thread = threading.Thread(target=cls.https_httpd.serve_forever) cls.https_server_thread.daemon = True cls.https_server_thread.start() @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) class TestHTTPRequestHandler(TestRequestHandlerBase): def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) with handler(verify=False) as rh: r = validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) assert r.status == 200 r.close() def test_ssl_error(self, handler): # HTTPS server with too old TLS version # XXX: is there a better way to test this than to create a new server? https_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True) https_port = http_server_port(https_httpd) https_server_thread = threading.Thread(target=https_httpd.serve_forever) https_server_thread.daemon = True https_server_thread.start() with handler(verify=False) as rh: with pytest.raises(SSLError, match=r'(?i)ssl(?:v3|/tls).alert.handshake.failure') as exc_info: validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert not issubclass(exc_info.type, CertificateVerifyError) @pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI') def test_legacy_ssl_extension(self, handler): # HTTPS server with old ciphers # XXX: is there a better way to test this than to create a new server? https_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.maximum_version = ssl.TLSVersion.TLSv1_2 sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL') sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None) https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True) https_port = http_server_port(https_httpd) https_server_thread = threading.Thread(target=https_httpd.serve_forever) https_server_thread.daemon = True https_server_thread.start() with handler(verify=False) as rh: res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers', extensions={'legacy_ssl': True})) assert res.status == 200 res.close() # Ensure only applies to request extension with pytest.raises(SSLError): validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) @pytest.mark.skip_handler('CurlCFFI', 'legacy_ssl ignored by CurlCFFI') def test_legacy_ssl_support(self, handler): # HTTPS server with old ciphers # XXX: is there a better way to test this than to create a new server? https_httpd = http.server.ThreadingHTTPServer( ('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.maximum_version = ssl.TLSVersion.TLSv1_2 sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL') sslctx.load_cert_chain(os.path.join(TEST_DIR, 'testcert.pem'), None) https_httpd.socket = sslctx.wrap_socket(https_httpd.socket, server_side=True) https_port = http_server_port(https_httpd) https_server_thread = threading.Thread(target=https_httpd.serve_forever) https_server_thread.daemon = True https_server_thread.start() with handler(verify=False, legacy_ssl_support=True) as rh: res = validate_and_send(rh, Request(f'https://127.0.0.1:{https_port}/headers')) assert res.status == 200 res.close() def test_percent_encode(self, handler): with handler() as rh: # Unicode characters should be encoded with uppercase percent-encoding res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/中文.html')) assert res.status == 200 res.close() # don't normalize existing percent encodings res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/%c7%9f')) assert res.status == 200 res.close() @pytest.mark.parametrize('path', [ '/a/b/./../../headers', '/redirect_dotsegments', # https://github.com/yt-dlp/yt-dlp/issues/9020 '/redirect_dotsegments_absolute', ]) def test_remove_dot_segments(self, handler, path): with handler(verbose=True) as rh: # This isn't a comprehensive test, # but it should be enough to check whether the handler is removing dot segments in required scenarios res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}{path}')) assert res.status == 200 assert res.url == f'http://127.0.0.1:{self.http_port}/headers' res.close() @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi (non-standard)') def test_unicode_path_redirection(self, handler): with handler() as rh: r = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/302-non-ascii-redirect')) assert r.url == f'http://127.0.0.1:{self.http_port}/%E4%B8%AD%E6%96%87.html' r.close() def test_raise_http_error(self, handler): with handler() as rh: for bad_status in (400, 500, 599, 302): with pytest.raises(HTTPError): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_{bad_status}')) # Should not raise an error validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')).close() def test_response_url(self, handler): with handler() as rh: # Response url should be that of the last url in redirect chain res = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_301')) assert res.url == f'http://127.0.0.1:{self.http_port}/method' res.close() res2 = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_200')) assert res2.url == f'http://127.0.0.1:{self.http_port}/gen_200' res2.close() # Covers some basic cases we expect some level of consistency between request handlers for @pytest.mark.parametrize('redirect_status,method,expected', [ # A 303 must either use GET or HEAD for subsequent request (303, 'POST', ('', 'GET', False)), (303, 'HEAD', ('', 'HEAD', False)), # 301 and 302 turn POST only into a GET (301, 'POST', ('', 'GET', False)), (301, 'HEAD', ('', 'HEAD', False)), (302, 'POST', ('', 'GET', False)), (302, 'HEAD', ('', 'HEAD', False)), # 307 and 308 should not change method (307, 'POST', ('testdata', 'POST', True)), (308, 'POST', ('testdata', 'POST', True)), (307, 'HEAD', ('', 'HEAD', False)), (308, 'HEAD', ('', 'HEAD', False)), ]) def test_redirect(self, handler, redirect_status, method, expected): with handler() as rh: data = b'testdata' if method == 'POST' else None headers = {} if data is not None: headers['Content-Type'] = 'application/test' res = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_{redirect_status}', method=method, data=data, headers=headers)) headers = b'' data_recv = b'' if data is not None: data_recv += res.read(len(data)) if data_recv != data: headers += data_recv data_recv = b'' headers += res.read() assert expected[0] == data_recv.decode() assert expected[1] == res.headers.get('method') assert expected[2] == ('content-length' in headers.decode().lower()) def test_request_cookie_header(self, handler): # We should accept a Cookie header being passed as in normal headers and handle it appropriately. with handler() as rh: # Specified Cookie header should be used res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/headers', headers={'Cookie': 'test=test'})).read().decode() assert 'cookie: test=test' in res.lower() # Specified Cookie header should be removed on any redirect res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/308-to-headers', headers={'Cookie': 'test=test2'})).read().decode() assert 'cookie: test=test2' not in res.lower() # Specified Cookie header should override global cookiejar for that request # Whether cookies from the cookiejar is applied on the redirect is considered undefined for now cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( version=0, name='test', value='ytdlp', port=None, port_specified=False, domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={})) with handler(cookiejar=cookiejar) as rh: data = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'cookie': 'test=test3'})).read() assert b'cookie: test=ytdlp' not in data.lower() assert b'cookie: test=test3' in data.lower() def test_redirect_loop(self, handler): with handler() as rh: with pytest.raises(HTTPError, match='redirect loop'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/redirect_loop')) def test_incompleteread(self, handler): with handler(timeout=2) as rh: with pytest.raises(IncompleteRead, match='13 bytes read, 234221 more expected'): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/incompleteread')).read() def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( 0, 'test', 'ytdlp', None, False, '127.0.0.1', True, False, '/headers', True, False, None, False, None, None, {})) with handler(cookiejar=cookiejar) as rh: data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() assert b'cookie: test=ytdlp' in data.lower() # Per request with handler() as rh: data = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': cookiejar})).read() assert b'cookie: test=ytdlp' in data.lower() def test_cookie_sync_only_cookiejar(self, handler): # Ensure that cookies are ONLY being handled by the cookiejar with handler() as rh: validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie', extensions={'cookiejar': YoutubeDLCookieJar()})) data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={'cookiejar': YoutubeDLCookieJar()})).read() assert b'cookie: test=ytdlp' not in data.lower() def test_cookie_sync_delete_cookie(self, handler): # Ensure that cookies are ONLY being handled by the cookiejar cookiejar = YoutubeDLCookieJar() with handler(cookiejar=cookiejar) as rh: validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/get_cookie')) data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() assert b'cookie: test=ytdlp' in data.lower() cookiejar.clear_session_cookies() data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read() assert b'cookie: test=ytdlp' not in data.lower() def test_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: # Global Headers data = validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).read().lower() assert b'test1: test' in data # Per request headers, merged with global data = validate_and_send(rh, Request( f'http://127.0.0.1:{self.http_port}/headers', headers={'test2': 'changed', 'test3': 'test3'})).read().lower() assert b'test1: test' in data assert b'test2: changed' in data assert b'test2: test2' not in data assert b'test3: test3' in data def test_read_timeout(self, handler): with handler() as rh: # Default timeout is 20 seconds, so this should go through validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1')) with handler(timeout=0.1) as rh: with pytest.raises(TransportError): validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_5')) # Per request timeout, should override handler timeout validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/timeout_1', extensions={'timeout': 4})) def test_connect_timeout(self, handler): # nothing should be listening on this port connect_timeout_url = 'http://10.255.255.255' with handler(timeout=0.01) as rh, pytest.raises(TransportError): now = time.time() validate_and_send(rh, Request(connect_timeout_url)) assert time.time() - now < DEFAULT_TIMEOUT # Per request timeout, should override handler timeout request = Request(connect_timeout_url, extensions={'timeout': 0.01}) with handler() as rh, pytest.raises(TransportError): now = time.time() validate_and_send(rh, request) assert time.time() - now < DEFAULT_TIMEOUT def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' # on some systems these loopback addresses we need for testing may not be available # see: https://github.com/yt-dlp/yt-dlp/issues/8890 verify_address_availability(source_address) with handler(source_address=source_address) as rh: data = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/source_address')).read().decode() assert source_address == data # Not supported by CurlCFFI @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi') def test_gzip_trailing_garbage(self, handler): with handler() as rh: data = validate_and_send(rh, Request(f'http://localhost:{self.http_port}/trailing_garbage')).read().decode() assert data == '<html><video src="/vid.mp4" /></html>' @pytest.mark.skip_handler('CurlCFFI', 'not applicable to curl-cffi') @pytest.mark.skipif(not brotli, reason='brotli support is not installed') def test_brotli(self, handler): with handler() as rh: res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'br'})) assert res.headers.get('Content-Encoding') == 'br' assert res.read() == b'<html><video src="/vid.mp4" /></html>' def test_deflate(self, handler): with handler() as rh: res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'deflate'})) assert res.headers.get('Content-Encoding') == 'deflate' assert res.read() == b'<html><video src="/vid.mp4" /></html>' def test_gzip(self, handler): with handler() as rh: res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'gzip'})) assert res.headers.get('Content-Encoding') == 'gzip' assert res.read() == b'<html><video src="/vid.mp4" /></html>' def test_multiple_encodings(self, handler): with handler() as rh: for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'): res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': pair})) assert res.headers.get('Content-Encoding') == pair assert res.read() == b'<html><video src="/vid.mp4" /></html>' @pytest.mark.skip_handler('CurlCFFI', 'not supported by curl-cffi') def test_unsupported_encoding(self, handler): with handler() as rh: res = validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/content-encoding', headers={'ytdl-encoding': 'unsupported', 'Accept-Encoding': '*'})) assert res.headers.get('Content-Encoding') == 'unsupported' assert res.read() == b'raw' def test_read(self, handler): with handler() as rh: res = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers')) assert res.readable() assert res.read(1) == b'H' assert res.read(3) == b'ost' assert res.read().decode().endswith('\n\n') assert res.read() == b'' def test_request_disable_proxy(self, handler): for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']: # Given the handler is configured with a proxy with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: # When a proxy is explicitly set to None for the request res = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'http': None})) # Then no proxy should be used res.close() assert res.status == 200 @pytest.mark.skip_handlers_if( lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY') def test_noproxy(self, handler): for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['http']: # Given the handler is configured with a proxy with handler(proxies={'http': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: for no_proxy in (f'127.0.0.1:{self.http_port}', '127.0.0.1', 'localhost'): # When request no proxy includes the request url host nop_response = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', proxies={'no': no_proxy})) # Then the proxy should not be used assert nop_response.status == 200 nop_response.close() @pytest.mark.skip_handlers_if( lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY') def test_allproxy(self, handler): # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy. # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures. with handler(proxies={'all': 'http://10.255.255.255'}, timeout=0.1) as rh: with pytest.raises(TransportError): validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/headers')).close() with handler(timeout=0.1) as rh: with pytest.raises(TransportError): validate_and_send( rh, Request( f'http://127.0.0.1:{self.http_port}/headers', proxies={'all': 'http://10.255.255.255'})).close() @pytest.mark.parametrize('handler', ['Urllib', 'Requests', 'CurlCFFI'], indirect=True) class TestClientCertificate: @classmethod def setup_class(cls): certfn = os.path.join(TEST_DIR, 'testcert.pem') cls.certdir = os.path.join(TEST_DIR, 'testdata', 'certificate') cacertfn = os.path.join(cls.certdir, 'ca.crt') cls.httpd = http.server.ThreadingHTTPServer(('127.0.0.1', 0), HTTPTestRequestHandler) sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.verify_mode = ssl.CERT_REQUIRED sslctx.load_verify_locations(cafile=cacertfn) sslctx.load_cert_chain(certfn, None) cls.httpd.socket = sslctx.wrap_socket(cls.httpd.socket, server_side=True) cls.port = http_server_port(cls.httpd) cls.server_thread = threading.Thread(target=cls.httpd.serve_forever) cls.server_thread.daemon = True cls.server_thread.start() def _run_test(self, handler, **handler_kwargs): with handler( # Disable client-side validation of unacceptable self-signed testcert.pem # The test is of a check on the server side, so unaffected verify=False, **handler_kwargs, ) as rh: validate_and_send(rh, Request(f'https://127.0.0.1:{self.port}/video.html')).read().decode() def test_certificate_combined_nopass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'clientwithkey.crt'), }) def test_certificate_nocombined_nopass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'client.crt'), 'client_certificate_key': os.path.join(self.certdir, 'client.key'), }) def test_certificate_combined_pass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'clientwithencryptedkey.crt'), 'client_certificate_password': 'foobar', }) def test_certificate_nocombined_pass(self, handler): self._run_test(handler, client_cert={ 'client_certificate': os.path.join(self.certdir, 'client.crt'), 'client_certificate_key': os.path.join(self.certdir, 'clientencrypted.key'), 'client_certificate_password': 'foobar', }) @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True) class TestHTTPImpersonateRequestHandler(TestRequestHandlerBase): def test_supported_impersonate_targets(self, handler): with handler(headers=std_headers) as rh: # note: this assumes the impersonate request handler supports the impersonate extension for target in rh.supported_targets: res = validate_and_send(rh, Request( f'http://127.0.0.1:{self.http_port}/headers', extensions={'impersonate': target})) assert res.status == 200 assert std_headers['user-agent'].lower() not in res.read().decode().lower() def test_response_extensions(self, handler): with handler() as rh: for target in rh.supported_targets: request = Request( f'http://127.0.0.1:{self.http_port}/gen_200', extensions={'impersonate': target}) res = validate_and_send(rh, request) assert res.extensions['impersonate'] == rh._get_request_target(request) def test_http_error_response_extensions(self, handler): with handler() as rh: for target in rh.supported_targets: request = Request( f'http://127.0.0.1:{self.http_port}/gen_404', extensions={'impersonate': target}) try: validate_and_send(rh, request) except HTTPError as e: res = e.response assert res.extensions['impersonate'] == rh._get_request_target(request) class TestRequestHandlerMisc: """Misc generic tests for request handlers, not related to request or validation testing""" @pytest.mark.parametrize('handler,logger_name', [ ('Requests', 'urllib3'), ('Websockets', 'websockets.client'), ('Websockets', 'websockets.server'), ], indirect=['handler']) def test_remove_logging_handler(self, handler, logger_name): # Ensure any logging handlers, which may contain a YoutubeDL instance, # are removed when we close the request handler # See: https://github.com/yt-dlp/yt-dlp/issues/8922 logging_handlers = logging.getLogger(logger_name).handlers before_count = len(logging_handlers) rh = handler() assert len(logging_handlers) == before_count + 1 rh.close() assert len(logging_handlers) == before_count def test_wrap_request_errors(self): class TestRequestHandler(RequestHandler): def _validate(self, request): if request.headers.get('x-fail'): raise UnsupportedRequest('test error') def _send(self, request: Request): raise RequestError('test error') with TestRequestHandler(logger=FakeLogger()) as rh: with pytest.raises(UnsupportedRequest, match='test error') as exc_info: rh.validate(Request('http://example.com', headers={'x-fail': '1'})) assert exc_info.value.handler is rh with pytest.raises(RequestError, match='test error') as exc_info: rh.send(Request('http://example.com')) assert exc_info.value.handler is rh @pytest.mark.parametrize('handler', ['Urllib'], indirect=True) class TestUrllibRequestHandler(TestRequestHandlerBase): def test_file_urls(self, handler): # See https://github.com/ytdl-org/youtube-dl/issues/8227 tf = tempfile.NamedTemporaryFile(delete=False) tf.write(b'foobar') tf.close() req = Request(pathlib.Path(tf.name).as_uri()) with handler() as rh: with pytest.raises(UnsupportedRequest): rh.validate(req) # Test that urllib never loaded FileHandler with pytest.raises(TransportError): rh.send(req) with handler(enable_file_urls=True) as rh: res = validate_and_send(rh, req) assert res.read() == b'foobar' res.close() os.unlink(tf.name) def test_http_error_returns_content(self, handler): # urllib HTTPError will try close the underlying response if reference to the HTTPError object is lost def get_response(): with handler() as rh: # headers url try: validate_and_send(rh, Request(f'http://127.0.0.1:{self.http_port}/gen_404')) except HTTPError as e: return e.response assert get_response().read() == b'<html></html>' def test_verify_cert_error_text(self, handler): # Check the output of the error message with handler() as rh: with pytest.raises( CertificateVerifyError, match=r'\[SSL: CERTIFICATE_VERIFY_FAILED\] certificate verify failed: self.signed certificate', ): validate_and_send(rh, Request(f'https://127.0.0.1:{self.https_port}/headers')) @pytest.mark.parametrize('req,match,version_check', [ # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1256 # bpo-39603: Check implemented in 3.7.9+, 3.8.5+ ( Request('http://127.0.0.1', method='GET\n'), 'method can\'t contain control characters', lambda v: v < (3, 7, 9) or (3, 8, 0) <= v < (3, 8, 5), ), # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1265 # bpo-38576: Check implemented in 3.7.8+, 3.8.3+ ( Request('http://127.0.0. 1', method='GET'), 'URL can\'t contain control characters', lambda v: v < (3, 7, 8) or (3, 8, 0) <= v < (3, 8, 3), ), # https://github.com/python/cpython/blob/987b712b4aeeece336eed24fcc87a950a756c3e2/Lib/http/client.py#L1288C31-L1288C50 (Request('http://127.0.0.1', headers={'foo\n': 'bar'}), 'Invalid header name', None), ]) def test_httplib_validation_errors(self, handler, req, match, version_check): if version_check and version_check(sys.version_info): pytest.skip(f'Python {sys.version} version does not have the required validation for this test.') with handler() as rh: with pytest.raises(RequestError, match=match) as exc_info: validate_and_send(rh, req) assert not isinstance(exc_info.value, TransportError) @pytest.mark.parametrize('handler', ['Requests'], indirect=True) class TestRequestsRequestHandler(TestRequestHandlerBase): @pytest.mark.parametrize('raised,expected', [ (lambda: requests.exceptions.ConnectTimeout(), TransportError), (lambda: requests.exceptions.ReadTimeout(), TransportError), (lambda: requests.exceptions.Timeout(), TransportError), (lambda: requests.exceptions.ConnectionError(), TransportError), (lambda: requests.exceptions.ProxyError(), ProxyError), (lambda: requests.exceptions.SSLError('12[CERTIFICATE_VERIFY_FAILED]34'), CertificateVerifyError), (lambda: requests.exceptions.SSLError(), SSLError), (lambda: requests.exceptions.InvalidURL(), RequestError), (lambda: requests.exceptions.InvalidHeader(), RequestError), # catch-all: https://github.com/psf/requests/blob/main/src/requests/adapters.py#L535 (lambda: urllib3.exceptions.HTTPError(), TransportError), (lambda: requests.exceptions.RequestException(), RequestError), # (lambda: requests.exceptions.TooManyRedirects(), HTTPError) - Needs a response object ]) def test_request_error_mapping(self, handler, monkeypatch, raised, expected): with handler() as rh: def mock_get_instance(*args, **kwargs): class MockSession: def request(self, *args, **kwargs): raise raised() return MockSession() monkeypatch.setattr(rh, '_get_instance', mock_get_instance) with pytest.raises(expected) as exc_info: rh.send(Request('http://fake')) assert exc_info.type is expected @pytest.mark.parametrize('raised,expected,match', [ (lambda: urllib3.exceptions.SSLError(), SSLError, None), (lambda: urllib3.exceptions.TimeoutError(), TransportError, None), (lambda: urllib3.exceptions.ReadTimeoutError(None, None, None), TransportError, None), (lambda: urllib3.exceptions.ProtocolError(), TransportError, None), (lambda: urllib3.exceptions.DecodeError(), TransportError, None), (lambda: urllib3.exceptions.HTTPError(), TransportError, None), # catch-all ( lambda: urllib3.exceptions.ProtocolError('error', http.client.IncompleteRead(partial=b'abc', expected=4)), IncompleteRead, '3 bytes read, 4 more expected', ), ( lambda: urllib3.exceptions.ProtocolError('error', urllib3.exceptions.IncompleteRead(partial=3, expected=5)), IncompleteRead, '3 bytes read, 5 more expected', ), ]) def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): from requests.models import Response as RequestsResponse from urllib3.response import HTTPResponse as Urllib3Response from yt_dlp.networking._requests import RequestsResponseAdapter requests_res = RequestsResponse() requests_res.raw = Urllib3Response(body=b'', status=200) res = RequestsResponseAdapter(requests_res) def mock_read(*args, **kwargs): raise raised() monkeypatch.setattr(res.fp, 'read', mock_read) with pytest.raises(expected, match=match) as exc_info: res.read() assert exc_info.type is expected def test_close(self, handler, monkeypatch): rh = handler() session = rh._get_instance(cookiejar=rh.cookiejar) called = False original_close = session.close def mock_close(*args, **kwargs): nonlocal called called = True return original_close(*args, **kwargs) monkeypatch.setattr(session, 'close', mock_close) rh.close() assert called @pytest.mark.parametrize('handler', ['CurlCFFI'], indirect=True) class TestCurlCFFIRequestHandler(TestRequestHandlerBase): @pytest.mark.parametrize('params,extensions', [ ({'impersonate': ImpersonateTarget('chrome', '110')}, {}), ({'impersonate': ImpersonateTarget('chrome', '99')}, {'impersonate': ImpersonateTarget('chrome', '110')}), ]) def test_impersonate(self, handler, params, extensions): with handler(headers=std_headers, **params) as rh: res = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions=extensions)).read().decode() assert 'sec-ch-ua: "Chromium";v="110"' in res # Check that user agent is added over ours assert 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36' in res def test_headers(self, handler): with handler(headers=std_headers) as rh: # Ensure curl-impersonate overrides our standard headers (usually added res = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', extensions={ 'impersonate': ImpersonateTarget('safari')}, headers={'x-custom': 'test', 'sec-fetch-mode': 'custom'})).read().decode().lower() assert std_headers['user-agent'].lower() not in res assert std_headers['accept-language'].lower() not in res assert std_headers['sec-fetch-mode'].lower() not in res # other than UA, custom headers that differ from std_headers should be kept assert 'sec-fetch-mode: custom' in res assert 'x-custom: test' in res # but when not impersonating don't remove std_headers res = validate_and_send( rh, Request(f'http://127.0.0.1:{self.http_port}/headers', headers={'x-custom': 'test'})).read().decode().lower() # std_headers should be present for k, v in std_headers.items(): assert f'{k}: {v}'.lower() in res @pytest.mark.parametrize('raised,expected,match', [ (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.PARTIAL_FILE), IncompleteRead, None), (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None), (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.RECV_ERROR), TransportError, None), ]) def test_response_error_mapping(self, handler, monkeypatch, raised, expected, match): import curl_cffi.requests from yt_dlp.networking._curlcffi import CurlCFFIResponseAdapter curl_res = curl_cffi.requests.Response() res = CurlCFFIResponseAdapter(curl_res) def mock_read(*args, **kwargs): try: raise raised() except Exception as e: e.response = curl_res raise monkeypatch.setattr(res.fp, 'read', mock_read) with pytest.raises(expected, match=match) as exc_info: res.read() assert exc_info.type is expected @pytest.mark.parametrize('raised,expected,match', [ (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.OPERATION_TIMEDOUT), TransportError, None), (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.PEER_FAILED_VERIFICATION), CertificateVerifyError, None), (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.SSL_CONNECT_ERROR), SSLError, None), (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.TOO_MANY_REDIRECTS), HTTPError, None), (lambda: curl_cffi.requests.errors.RequestsError( '', code=curl_cffi.const.CurlECode.PROXY), ProxyError, None), ]) def test_request_error_mapping(self, handler, monkeypatch, raised, expected, match): import curl_cffi.requests curl_res = curl_cffi.requests.Response() curl_res.status_code = 301 with handler() as rh: original_get_instance = rh._get_instance def mock_get_instance(*args, **kwargs): instance = original_get_instance(*args, **kwargs) def request(*_, **__): try: raise raised() except Exception as e: e.response = curl_res raise monkeypatch.setattr(instance, 'request', request) return instance monkeypatch.setattr(rh, '_get_instance', mock_get_instance) with pytest.raises(expected) as exc_info: rh.send(Request('http://fake')) assert exc_info.type is expected def test_response_reader(self, handler): class FakeResponse: def __init__(self, raise_error=False): self.raise_error = raise_error self.closed = False def iter_content(self): yield b'foo' yield b'bar' yield b'z' if self.raise_error: raise Exception('test') def close(self): self.closed = True from yt_dlp.networking._curlcffi import CurlCFFIResponseReader res = CurlCFFIResponseReader(FakeResponse()) assert res.readable assert res.bytes_read == 0 assert res.read(1) == b'f' assert res.bytes_read == 3 assert res._buffer == b'oo' assert res.read(2) == b'oo' assert res.bytes_read == 3 assert res._buffer == b'' assert res.read(2) == b'ba' assert res.bytes_read == 6 assert res._buffer == b'r' assert res.read(3) == b'rz' assert res.bytes_read == 7 assert res._buffer == b'' assert res.closed assert res._response.closed # should handle no size param res2 = CurlCFFIResponseReader(FakeResponse()) assert res2.read() == b'foobarz' assert res2.bytes_read == 7 assert res2._buffer == b'' assert res2.closed # should close on an exception res3 = CurlCFFIResponseReader(FakeResponse(raise_error=True)) with pytest.raises(Exception, match='test'): res3.read() assert res3._buffer == b'' assert res3.bytes_read == 7 assert res3.closed # buffer should be cleared on close res4 = CurlCFFIResponseReader(FakeResponse()) res4.read(2) assert res4._buffer == b'o' res4.close() assert res4.closed assert res4._buffer == b'' def run_validation(handler, error, req, **handler_kwargs): with handler(**handler_kwargs) as rh: if error: with pytest.raises(error): rh.validate(req) else: rh.validate(req) class TestRequestHandlerValidation: class ValidationRH(RequestHandler): def _send(self, request): raise RequestError('test') class NoCheckRH(ValidationRH): _SUPPORTED_FEATURES = None _SUPPORTED_PROXY_SCHEMES = None _SUPPORTED_URL_SCHEMES = None def _check_extensions(self, extensions): extensions.clear() class HTTPSupportedRH(ValidationRH): _SUPPORTED_URL_SCHEMES = ('http',) URL_SCHEME_TESTS = [ # scheme, expected to fail, handler kwargs ('Urllib', [ ('http', False, {}), ('https', False, {}), ('data', False, {}), ('ftp', False, {}), ('file', UnsupportedRequest, {}), ('file', False, {'enable_file_urls': True}), ]), ('Requests', [ ('http', False, {}), ('https', False, {}), ]), ('Websockets', [ ('ws', False, {}), ('wss', False, {}), ]), ('CurlCFFI', [ ('http', False, {}), ('https', False, {}), ]), (NoCheckRH, [('http', False, {})]), (ValidationRH, [('http', UnsupportedRequest, {})]), ] PROXY_SCHEME_TESTS = [ # proxy scheme, expected to fail ('Urllib', 'http', [ ('http', False), ('https', UnsupportedRequest), ('socks4', False), ('socks4a', False), ('socks5', False), ('socks5h', False), ('socks', UnsupportedRequest), ]), ('Requests', 'http', [ ('http', False), ('https', False), ('socks4', False), ('socks4a', False), ('socks5', False), ('socks5h', False), ]), ('CurlCFFI', 'http', [ ('http', False), ('https', False), ('socks4', False), ('socks4a', False), ('socks5', False), ('socks5h', False), ]), ('Websockets', 'ws', [ ('http', UnsupportedRequest), ('https', UnsupportedRequest), ('socks4', False), ('socks4a', False), ('socks5', False), ('socks5h', False), ]), (NoCheckRH, 'http', [('http', False)]), (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]), (NoCheckRH, 'http', [('http', False)]), (HTTPSupportedRH, 'http', [('http', UnsupportedRequest)]), ] PROXY_KEY_TESTS = [ # proxy key, proxy scheme, expected to fail ('Urllib', 'http', [ ('all', 'http', False), ('unrelated', 'http', False), ]), ('Requests', 'http', [ ('all', 'http', False), ('unrelated', 'http', False), ]), ('CurlCFFI', 'http', [ ('all', 'http', False), ('unrelated', 'http', False), ]), ('Websockets', 'ws', [ ('all', 'socks5', False), ('unrelated', 'socks5', False), ]), (NoCheckRH, 'http', [('all', 'http', False)]), (HTTPSupportedRH, 'http', [('all', 'http', UnsupportedRequest)]), (HTTPSupportedRH, 'http', [('no', 'http', UnsupportedRequest)]), ] EXTENSION_TESTS = [ ('Urllib', 'http', [ ({'cookiejar': 'notacookiejar'}, AssertionError), ({'cookiejar': YoutubeDLCookieJar()}, False), ({'cookiejar': CookieJar()}, AssertionError), ({'timeout': 1}, False), ({'timeout': 'notatimeout'}, AssertionError), ({'unsupported': 'value'}, UnsupportedRequest), ({'legacy_ssl': False}, False), ({'legacy_ssl': True}, False), ({'legacy_ssl': 'notabool'}, AssertionError), ]), ('Requests', 'http', [ ({'cookiejar': 'notacookiejar'}, AssertionError), ({'cookiejar': YoutubeDLCookieJar()}, False), ({'timeout': 1}, False), ({'timeout': 'notatimeout'}, AssertionError), ({'unsupported': 'value'}, UnsupportedRequest), ({'legacy_ssl': False}, False), ({'legacy_ssl': True}, False), ({'legacy_ssl': 'notabool'}, AssertionError), ]), ('CurlCFFI', 'http', [ ({'cookiejar': 'notacookiejar'}, AssertionError), ({'cookiejar': YoutubeDLCookieJar()}, False), ({'timeout': 1}, False), ({'timeout': 'notatimeout'}, AssertionError), ({'unsupported': 'value'}, UnsupportedRequest), ({'impersonate': ImpersonateTarget('badtarget', None, None, None)}, UnsupportedRequest), ({'impersonate': 123}, AssertionError), ({'impersonate': ImpersonateTarget('chrome', None, None, None)}, False), ({'impersonate': ImpersonateTarget(None, None, None, None)}, False), ({'impersonate': ImpersonateTarget()}, False), ({'impersonate': 'chrome'}, AssertionError), ({'legacy_ssl': False}, False), ({'legacy_ssl': True}, False), ({'legacy_ssl': 'notabool'}, AssertionError), ]), (NoCheckRH, 'http', [ ({'cookiejar': 'notacookiejar'}, False), ({'somerandom': 'test'}, False), # but any extension is allowed through ]), ('Websockets', 'ws', [ ({'cookiejar': YoutubeDLCookieJar()}, False), ({'timeout': 2}, False), ({'legacy_ssl': False}, False), ({'legacy_ssl': True}, False), ({'legacy_ssl': 'notabool'}, AssertionError), ]), ] @pytest.mark.parametrize('handler,fail,scheme', [ ('Urllib', False, 'http'), ('Requests', False, 'http'), ('CurlCFFI', False, 'http'), ('Websockets', False, 'ws'), ], indirect=['handler']) def test_no_proxy(self, handler, fail, scheme): run_validation(handler, fail, Request(f'{scheme}://', proxies={'no': '127.0.0.1,github.com'})) run_validation(handler, fail, Request(f'{scheme}://'), proxies={'no': '127.0.0.1,github.com'}) @pytest.mark.parametrize('handler,scheme', [ ('Urllib', 'http'), (HTTPSupportedRH, 'http'), ('Requests', 'http'), ('CurlCFFI', 'http'), ('Websockets', 'ws'), ], indirect=['handler']) def test_empty_proxy(self, handler, scheme): run_validation(handler, False, Request(f'{scheme}://', proxies={scheme: None})) run_validation(handler, False, Request(f'{scheme}://'), proxies={scheme: None}) @pytest.mark.parametrize('proxy_url', ['//example.com', 'example.com', '127.0.0.1', '/a/b/c']) @pytest.mark.parametrize('handler,scheme', [ ('Urllib', 'http'), (HTTPSupportedRH, 'http'), ('Requests', 'http'), ('CurlCFFI', 'http'), ('Websockets', 'ws'), ], indirect=['handler']) def test_invalid_proxy_url(self, handler, scheme, proxy_url): run_validation(handler, UnsupportedRequest, Request(f'{scheme}://', proxies={scheme: proxy_url})) @pytest.mark.parametrize('handler,scheme,fail,handler_kwargs', [ (handler_tests[0], scheme, fail, handler_kwargs) for handler_tests in URL_SCHEME_TESTS for scheme, fail, handler_kwargs in handler_tests[1] ], indirect=['handler']) def test_url_scheme(self, handler, scheme, fail, handler_kwargs): run_validation(handler, fail, Request(f'{scheme}://'), **(handler_kwargs or {})) @pytest.mark.parametrize('handler,scheme,proxy_key,proxy_scheme,fail', [ (handler_tests[0], handler_tests[1], proxy_key, proxy_scheme, fail) for handler_tests in PROXY_KEY_TESTS for proxy_key, proxy_scheme, fail in handler_tests[2] ], indirect=['handler']) def test_proxy_key(self, handler, scheme, proxy_key, proxy_scheme, fail): run_validation(handler, fail, Request(f'{scheme}://', proxies={proxy_key: f'{proxy_scheme}://example.com'})) run_validation(handler, fail, Request(f'{scheme}://'), proxies={proxy_key: f'{proxy_scheme}://example.com'}) @pytest.mark.parametrize('handler,req_scheme,scheme,fail', [ (handler_tests[0], handler_tests[1], scheme, fail) for handler_tests in PROXY_SCHEME_TESTS for scheme, fail in handler_tests[2] ], indirect=['handler']) def test_proxy_scheme(self, handler, req_scheme, scheme, fail): run_validation(handler, fail, Request(f'{req_scheme}://', proxies={req_scheme: f'{scheme}://example.com'})) run_validation(handler, fail, Request(f'{req_scheme}://'), proxies={req_scheme: f'{scheme}://example.com'}) @pytest.mark.parametrize('handler,scheme,extensions,fail', [ (handler_tests[0], handler_tests[1], extensions, fail) for handler_tests in EXTENSION_TESTS for extensions, fail in handler_tests[2] ], indirect=['handler']) def test_extension(self, handler, scheme, extensions, fail): run_validation( handler, fail, Request(f'{scheme}://', extensions=extensions)) def test_invalid_request_type(self): rh = self.ValidationRH(logger=FakeLogger()) for method in (rh.validate, rh.send): with pytest.raises(TypeError, match='Expected an instance of Request'): method('not a request') class FakeResponse(Response): def __init__(self, request): # XXX: we could make request part of standard response interface self.request = request super().__init__(fp=io.BytesIO(b''), headers={}, url=request.url) class FakeRH(RequestHandler): def __init__(self, *args, **params): self.params = params super().__init__(*args, **params) def _validate(self, request): return def _send(self, request: Request): if request.url.startswith('ssl://'): raise SSLError(request.url[len('ssl://'):]) return FakeResponse(request) class FakeRHYDL(FakeYDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self._request_director = self.build_request_director([FakeRH]) class AllUnsupportedRHYDL(FakeYDL): def __init__(self, *args, **kwargs): class UnsupportedRH(RequestHandler): def _send(self, request: Request): pass _SUPPORTED_FEATURES = () _SUPPORTED_PROXY_SCHEMES = () _SUPPORTED_URL_SCHEMES = () super().__init__(*args, **kwargs) self._request_director = self.build_request_director([UnsupportedRH]) class TestRequestDirector: def test_handler_operations(self): director = RequestDirector(logger=FakeLogger()) handler = FakeRH(logger=FakeLogger()) director.add_handler(handler) assert director.handlers.get(FakeRH.RH_KEY) is handler # Handler should overwrite handler2 = FakeRH(logger=FakeLogger()) director.add_handler(handler2) assert director.handlers.get(FakeRH.RH_KEY) is not handler assert director.handlers.get(FakeRH.RH_KEY) is handler2 assert len(director.handlers) == 1 class AnotherFakeRH(FakeRH): pass director.add_handler(AnotherFakeRH(logger=FakeLogger())) assert len(director.handlers) == 2 assert director.handlers.get(AnotherFakeRH.RH_KEY).RH_KEY == AnotherFakeRH.RH_KEY director.handlers.pop(FakeRH.RH_KEY, None) assert director.handlers.get(FakeRH.RH_KEY) is None assert len(director.handlers) == 1 # RequestErrors should passthrough with pytest.raises(SSLError): director.send(Request('ssl://something')) def test_send(self): director = RequestDirector(logger=FakeLogger()) with pytest.raises(RequestError): director.send(Request('any://')) director.add_handler(FakeRH(logger=FakeLogger())) assert isinstance(director.send(Request('http://')), FakeResponse) def test_unsupported_handlers(self): class SupportedRH(RequestHandler): _SUPPORTED_URL_SCHEMES = ['http'] def _send(self, request: Request): return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url) director = RequestDirector(logger=FakeLogger()) director.add_handler(SupportedRH(logger=FakeLogger())) director.add_handler(FakeRH(logger=FakeLogger())) # First should take preference assert director.send(Request('http://')).read() == b'supported' assert director.send(Request('any://')).read() == b'' director.handlers.pop(FakeRH.RH_KEY) with pytest.raises(NoSupportingHandlers): director.send(Request('any://')) def test_unexpected_error(self): director = RequestDirector(logger=FakeLogger()) class UnexpectedRH(FakeRH): def _send(self, request: Request): raise TypeError('something') director.add_handler(UnexpectedRH(logger=FakeLogger)) with pytest.raises(NoSupportingHandlers, match=r'1 unexpected error'): director.send(Request('any://')) director.handlers.clear() assert len(director.handlers) == 0 # Should not be fatal director.add_handler(FakeRH(logger=FakeLogger())) director.add_handler(UnexpectedRH(logger=FakeLogger)) assert director.send(Request('any://')) def test_preference(self): director = RequestDirector(logger=FakeLogger()) director.add_handler(FakeRH(logger=FakeLogger())) class SomeRH(RequestHandler): _SUPPORTED_URL_SCHEMES = ['http'] def _send(self, request: Request): return Response(fp=io.BytesIO(b'supported'), headers={}, url=request.url) def some_preference(rh, request): return (0 if not isinstance(rh, SomeRH) else 100 if 'prefer' in request.headers else -1) director.add_handler(SomeRH(logger=FakeLogger())) director.preferences.add(some_preference) assert director.send(Request('http://')).read() == b'' assert director.send(Request('http://', headers={'prefer': '1'})).read() == b'supported' def test_close(self, monkeypatch): director = RequestDirector(logger=FakeLogger()) director.add_handler(FakeRH(logger=FakeLogger())) called = False def mock_close(*args, **kwargs): nonlocal called called = True monkeypatch.setattr(director.handlers[FakeRH.RH_KEY], 'close', mock_close) director.close() assert called # XXX: do we want to move this to test_YoutubeDL.py? class TestYoutubeDLNetworking: @staticmethod def build_handler(ydl, handler: RequestHandler = FakeRH): return ydl.build_request_director([handler]).handlers.get(handler.RH_KEY) def test_compat_opener(self): with FakeYDL() as ydl: with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) assert isinstance(ydl._opener, urllib.request.OpenerDirector) @pytest.mark.parametrize('proxy,expected', [ ('http://127.0.0.1:8080', {'all': 'http://127.0.0.1:8080'}), ('', {'all': '__noproxy__'}), (None, {'http': 'http://127.0.0.1:8081', 'https': 'http://127.0.0.1:8081'}), # env, set https ]) def test_proxy(self, proxy, expected, monkeypatch): monkeypatch.setenv('HTTP_PROXY', 'http://127.0.0.1:8081') with FakeYDL({'proxy': proxy}) as ydl: assert ydl.proxies == expected def test_compat_request(self): with FakeRHYDL() as ydl: assert ydl.urlopen('test://') urllib_req = urllib.request.Request('http://foo.bar', data=b'test', method='PUT', headers={'X-Test': '1'}) urllib_req.add_unredirected_header('Cookie', 'bob=bob') urllib_req.timeout = 2 with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) req = ydl.urlopen(urllib_req).request assert req.url == urllib_req.get_full_url() assert req.data == urllib_req.data assert req.method == urllib_req.get_method() assert 'X-Test' in req.headers assert 'Cookie' in req.headers assert req.extensions.get('timeout') == 2 with pytest.raises(AssertionError): ydl.urlopen(None) def test_extract_basic_auth(self): with FakeRHYDL() as ydl: res = ydl.urlopen(Request('http://user:pass@foo.bar')) assert res.request.headers['Authorization'] == 'Basic dXNlcjpwYXNz' def test_sanitize_url(self): with FakeRHYDL() as ydl: res = ydl.urlopen(Request('httpss://foo.bar')) assert res.request.url == 'https://foo.bar' def test_file_urls_error(self): # use urllib handler with FakeYDL() as ydl: with pytest.raises(RequestError, match=r'file:// URLs are disabled by default'): ydl.urlopen('file://') @pytest.mark.parametrize('scheme', (['ws', 'wss'])) def test_websocket_unavailable_error(self, scheme): with AllUnsupportedRHYDL() as ydl: with pytest.raises(RequestError, match=r'This request requires WebSocket support'): ydl.urlopen(f'{scheme}://') def test_legacy_server_connect_error(self): with FakeRHYDL() as ydl: for error in ('UNSAFE_LEGACY_RENEGOTIATION_DISABLED', 'SSLV3_ALERT_HANDSHAKE_FAILURE'): with pytest.raises(RequestError, match=r'Try using --legacy-server-connect'): ydl.urlopen(f'ssl://{error}') with pytest.raises(SSLError, match='testerror'): ydl.urlopen('ssl://testerror') def test_unsupported_impersonate_target(self): class FakeImpersonationRHYDL(FakeYDL): def __init__(self, *args, **kwargs): class HTTPRH(RequestHandler): def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) _SUPPORTED_PROXY_SCHEMES = None super().__init__(*args, **kwargs) self._request_director = self.build_request_director([HTTPRH]) with FakeImpersonationRHYDL() as ydl: with pytest.raises( RequestError, match=r'Impersonate target "test" is not available', ): ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) def test_unsupported_impersonate_extension(self): class FakeHTTPRHYDL(FakeYDL): def __init__(self, *args, **kwargs): class IRH(ImpersonateRequestHandler): def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'} _SUPPORTED_PROXY_SCHEMES = None super().__init__(*args, **kwargs) self._request_director = self.build_request_director([IRH]) with FakeHTTPRHYDL() as ydl: with pytest.raises( RequestError, match=r'Impersonate target "test" is not available', ): ydl.urlopen(Request('http://', extensions={'impersonate': ImpersonateTarget('test', None, None, None)})) def test_raise_impersonate_error(self): with pytest.raises( YoutubeDLError, match=r'Impersonate target "test" is not available', ): FakeYDL({'impersonate': ImpersonateTarget('test', None, None, None)}) def test_pass_impersonate_param(self, monkeypatch): class IRH(ImpersonateRequestHandler): def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget('abc'): 'test'} # Bypass the check on initialize brh = FakeYDL.build_request_director monkeypatch.setattr(FakeYDL, 'build_request_director', lambda cls, handlers, preferences=None: brh(cls, handlers=[IRH])) with FakeYDL({ 'impersonate': ImpersonateTarget('abc', None, None, None), }) as ydl: rh = self.build_handler(ydl, IRH) assert rh.impersonate == ImpersonateTarget('abc', None, None, None) def test_get_impersonate_targets(self): handlers = [] for target_client in ('abc', 'xyz', 'asd'): class TestRH(ImpersonateRequestHandler): def _send(self, request: Request): pass _SUPPORTED_URL_SCHEMES = ('http',) _SUPPORTED_IMPERSONATE_TARGET_MAP = {ImpersonateTarget(target_client): 'test'} RH_KEY = target_client RH_NAME = target_client handlers.append(TestRH) with FakeYDL() as ydl: ydl._request_director = ydl.build_request_director(handlers) assert set(ydl._get_available_impersonate_targets()) == { (ImpersonateTarget('xyz'), 'xyz'), (ImpersonateTarget('abc'), 'abc'), (ImpersonateTarget('asd'), 'asd'), } assert ydl._impersonate_target_available(ImpersonateTarget('abc')) assert ydl._impersonate_target_available(ImpersonateTarget()) assert not ydl._impersonate_target_available(ImpersonateTarget('zxy')) @pytest.mark.parametrize('proxy_key,proxy_url,expected', [ ('http', '__noproxy__', None), ('no', '127.0.0.1,foo.bar', '127.0.0.1,foo.bar'), ('https', 'example.com', 'http://example.com'), ('https', '//example.com', 'http://example.com'), ('https', 'socks5://example.com', 'socks5h://example.com'), ('http', 'socks://example.com', 'socks4://example.com'), ('http', 'socks4://example.com', 'socks4://example.com'), ('unrelated', '/bad/proxy', '/bad/proxy'), # clean_proxies should ignore bad proxies ]) def test_clean_proxy(self, proxy_key, proxy_url, expected, monkeypatch): # proxies should be cleaned in urlopen() with FakeRHYDL() as ydl: req = ydl.urlopen(Request('test://', proxies={proxy_key: proxy_url})).request assert req.proxies[proxy_key] == expected # and should also be cleaned when building the handler monkeypatch.setenv(f'{proxy_key.upper()}_PROXY', proxy_url) with FakeYDL() as ydl: rh = self.build_handler(ydl) assert rh.proxies[proxy_key] == expected def test_clean_proxy_header(self): with FakeRHYDL() as ydl: req = ydl.urlopen(Request('test://', headers={'ytdl-request-proxy': '//foo.bar'})).request assert 'ytdl-request-proxy' not in req.headers assert req.proxies == {'all': 'http://foo.bar'} with FakeYDL({'http_headers': {'ytdl-request-proxy': '//foo.bar'}}) as ydl: rh = self.build_handler(ydl) assert 'ytdl-request-proxy' not in rh.headers assert rh.proxies == {'all': 'http://foo.bar'} def test_clean_header(self): with FakeRHYDL() as ydl: res = ydl.urlopen(Request('test://', headers={'Youtubedl-no-compression': True})) assert 'Youtubedl-no-compression' not in res.request.headers assert res.request.headers.get('Accept-Encoding') == 'identity' with FakeYDL({'http_headers': {'Youtubedl-no-compression': True}}) as ydl: rh = self.build_handler(ydl) assert 'Youtubedl-no-compression' not in rh.headers assert rh.headers.get('Accept-Encoding') == 'identity' with FakeYDL({'http_headers': {'Ytdl-socks-proxy': 'socks://localhost:1080'}}) as ydl: rh = self.build_handler(ydl) assert 'Ytdl-socks-proxy' not in rh.headers def test_build_handler_params(self): with FakeYDL({ 'http_headers': {'test': 'testtest'}, 'socket_timeout': 2, 'proxy': 'http://127.0.0.1:8080', 'source_address': '127.0.0.45', 'debug_printtraffic': True, 'compat_opts': ['no-certifi'], 'nocheckcertificate': True, 'legacyserverconnect': True, }) as ydl: rh = self.build_handler(ydl) assert rh.headers.get('test') == 'testtest' assert 'Accept' in rh.headers # ensure std_headers are still there assert rh.timeout == 2 assert rh.proxies.get('all') == 'http://127.0.0.1:8080' assert rh.source_address == '127.0.0.45' assert rh.verbose is True assert rh.prefer_system_certs is True assert rh.verify is False assert rh.legacy_ssl_support is True @pytest.mark.parametrize('ydl_params', [ {'client_certificate': 'fakecert.crt'}, {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key'}, {'client_certificate': 'fakecert.crt', 'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'}, {'client_certificate_key': 'fakekey.key', 'client_certificate_password': 'foobar'}, ]) def test_client_certificate(self, ydl_params): with FakeYDL(ydl_params) as ydl: rh = self.build_handler(ydl) assert rh._client_cert == ydl_params # XXX: Too bound to implementation def test_urllib_file_urls(self): with FakeYDL({'enable_file_urls': False}) as ydl: rh = self.build_handler(ydl, UrllibRH) assert rh.enable_file_urls is False with FakeYDL({'enable_file_urls': True}) as ydl: rh = self.build_handler(ydl, UrllibRH) assert rh.enable_file_urls is True def test_compat_opt_prefer_urllib(self): # This assumes urllib only has a preference when this compat opt is given with FakeYDL({'compat_opts': ['prefer-legacy-http-handler']}) as ydl: director = ydl.build_request_director([UrllibRH]) assert len(director.preferences) == 1 assert director.preferences.pop()(UrllibRH, None) class TestRequest: def test_query(self): req = Request('http://example.com?q=something', query={'v': 'xyz'}) assert req.url == 'http://example.com?q=something&v=xyz' req.update(query={'v': '123'}) assert req.url == 'http://example.com?q=something&v=123' req.update(url='http://example.com', query={'v': 'xyz'}) assert req.url == 'http://example.com?v=xyz' def test_method(self): req = Request('http://example.com') assert req.method == 'GET' req.data = b'test' assert req.method == 'POST' req.data = None assert req.method == 'GET' req.data = b'test2' req.method = 'PUT' assert req.method == 'PUT' req.data = None assert req.method == 'PUT' with pytest.raises(TypeError): req.method = 1 def test_request_helpers(self): assert HEADRequest('http://example.com').method == 'HEAD' assert PUTRequest('http://example.com').method == 'PUT' def test_headers(self): req = Request('http://example.com', headers={'tesT': 'test'}) assert req.headers == HTTPHeaderDict({'test': 'test'}) req.update(headers={'teSt2': 'test2'}) assert req.headers == HTTPHeaderDict({'test': 'test', 'test2': 'test2'}) req.headers = new_headers = HTTPHeaderDict({'test': 'test'}) assert req.headers == HTTPHeaderDict({'test': 'test'}) assert req.headers is new_headers # test converts dict to case insensitive dict req.headers = new_headers = {'test2': 'test2'} assert isinstance(req.headers, HTTPHeaderDict) assert req.headers is not new_headers with pytest.raises(TypeError): req.headers = None def test_data_type(self): req = Request('http://example.com') assert req.data is None # test bytes is allowed req.data = b'test' assert req.data == b'test' # test iterable of bytes is allowed i = [b'test', b'test2'] req.data = i assert req.data == i # test file-like object is allowed f = io.BytesIO(b'test') req.data = f assert req.data == f # common mistake: test str not allowed with pytest.raises(TypeError): req.data = 'test' assert req.data != 'test' # common mistake: test dict is not allowed with pytest.raises(TypeError): req.data = {'test': 'test'} assert req.data != {'test': 'test'} def test_content_length_header(self): req = Request('http://example.com', headers={'Content-Length': '0'}, data=b'') assert req.headers.get('Content-Length') == '0' req.data = b'test' assert 'Content-Length' not in req.headers req = Request('http://example.com', headers={'Content-Length': '10'}) assert 'Content-Length' not in req.headers def test_content_type_header(self): req = Request('http://example.com', headers={'Content-Type': 'test'}, data=b'test') assert req.headers.get('Content-Type') == 'test' req.data = b'test2' assert req.headers.get('Content-Type') == 'test' req.data = None assert 'Content-Type' not in req.headers req.data = b'test3' assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded' def test_update_req(self): req = Request('http://example.com') assert req.data is None assert req.method == 'GET' assert 'Content-Type' not in req.headers # Test that zero-byte payloads will be sent req.update(data=b'') assert req.data == b'' assert req.method == 'POST' assert req.headers.get('Content-Type') == 'application/x-www-form-urlencoded' def test_proxies(self): req = Request(url='http://example.com', proxies={'http': 'http://127.0.0.1:8080'}) assert req.proxies == {'http': 'http://127.0.0.1:8080'} def test_extensions(self): req = Request(url='http://example.com', extensions={'timeout': 2}) assert req.extensions == {'timeout': 2} def test_copy(self): req = Request( url='http://example.com', extensions={'cookiejar': CookieJar()}, headers={'Accept-Encoding': 'br'}, proxies={'http': 'http://127.0.0.1'}, data=[b'123'], ) req_copy = req.copy() assert req_copy is not req assert req_copy.url == req.url assert req_copy.headers == req.headers assert req_copy.headers is not req.headers assert req_copy.proxies == req.proxies assert req_copy.proxies is not req.proxies # Data is not able to be copied assert req_copy.data == req.data assert req_copy.data is req.data # Shallow copy extensions assert req_copy.extensions is not req.extensions assert req_copy.extensions['cookiejar'] == req.extensions['cookiejar'] # Subclasses are copied by default class AnotherRequest(Request): pass req = AnotherRequest(url='http://127.0.0.1') assert isinstance(req.copy(), AnotherRequest) def test_url(self): req = Request(url='https://фtest.example.com/ some spaceв?ä=c') assert req.url == 'https://xn--test-z6d.example.com/%20some%20space%D0%B2?%C3%A4=c' assert Request(url='//example.com').url == 'http://example.com' with pytest.raises(TypeError): Request(url='https://').url = None class TestResponse: @pytest.mark.parametrize('reason,status,expected', [ ('custom', 200, 'custom'), (None, 404, 'Not Found'), # fallback status ('', 403, 'Forbidden'), (None, 999, None), ]) def test_reason(self, reason, status, expected): res = Response(io.BytesIO(b''), url='test://', headers={}, status=status, reason=reason) assert res.reason == expected def test_headers(self): headers = Message() headers.add_header('Test', 'test') headers.add_header('Test', 'test2') headers.add_header('content-encoding', 'br') res = Response(io.BytesIO(b''), headers=headers, url='test://') assert res.headers.get_all('test') == ['test', 'test2'] assert 'Content-Encoding' in res.headers def test_get_header(self): headers = Message() headers.add_header('Set-Cookie', 'cookie1') headers.add_header('Set-cookie', 'cookie2') headers.add_header('Test', 'test') headers.add_header('Test', 'test2') res = Response(io.BytesIO(b''), headers=headers, url='test://') assert res.get_header('test') == 'test, test2' assert res.get_header('set-Cookie') == 'cookie1' assert res.get_header('notexist', 'default') == 'default' def test_compat(self): res = Response(io.BytesIO(b''), url='test://', status=404, headers={'test': 'test'}) with warnings.catch_warnings(): warnings.simplefilter('ignore', category=DeprecationWarning) assert res.code == res.getcode() == res.status assert res.geturl() == res.url assert res.info() is res.headers assert res.getheader('test') == res.get_header('test') class TestImpersonateTarget: @pytest.mark.parametrize('target_str,expected', [ ('abc', ImpersonateTarget('abc', None, None, None)), ('abc-120_esr', ImpersonateTarget('abc', '120_esr', None, None)), ('abc-120:xyz', ImpersonateTarget('abc', '120', 'xyz', None)), ('abc-120:xyz-5.6', ImpersonateTarget('abc', '120', 'xyz', '5.6')), ('abc:xyz', ImpersonateTarget('abc', None, 'xyz', None)), ('abc:', ImpersonateTarget('abc', None, None, None)), ('abc-120:', ImpersonateTarget('abc', '120', None, None)), (':xyz', ImpersonateTarget(None, None, 'xyz', None)), (':xyz-6.5', ImpersonateTarget(None, None, 'xyz', '6.5')), (':', ImpersonateTarget(None, None, None, None)), ('', ImpersonateTarget(None, None, None, None)), ]) def test_target_from_str(self, target_str, expected): assert ImpersonateTarget.from_str(target_str) == expected @pytest.mark.parametrize('target_str', [ '-120', ':-12.0', '-12:-12', '-:-', '::', 'a-c-d:', 'a-c-d:e-f-g', 'a:b:', ]) def test_target_from_invalid_str(self, target_str): with pytest.raises(ValueError): ImpersonateTarget.from_str(target_str) @pytest.mark.parametrize('target,expected', [ (ImpersonateTarget('abc', None, None, None), 'abc'), (ImpersonateTarget('abc', '120', None, None), 'abc-120'), (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'), (ImpersonateTarget('abc', '120', 'xyz', '5'), 'abc-120:xyz-5'), (ImpersonateTarget('abc', None, 'xyz', None), 'abc:xyz'), (ImpersonateTarget('abc', '120', None, None), 'abc-120'), (ImpersonateTarget('abc', '120', 'xyz', None), 'abc-120:xyz'), (ImpersonateTarget('abc', None, 'xyz'), 'abc:xyz'), (ImpersonateTarget(None, None, 'xyz', '6.5'), ':xyz-6.5'), (ImpersonateTarget('abc'), 'abc'), (ImpersonateTarget(None, None, None, None), ''), ]) def test_str(self, target, expected): assert str(target) == expected @pytest.mark.parametrize('args', [ ('abc', None, None, '5'), ('abc', '120', None, '5'), (None, '120', None, None), (None, '120', None, '5'), (None, None, None, '5'), (None, '120', 'xyz', '5'), ]) def test_invalid_impersonate_target(self, args): with pytest.raises(ValueError): ImpersonateTarget(*args) @pytest.mark.parametrize('target1,target2,is_in,is_eq', [ (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', None, None, None), True, True), (ImpersonateTarget('abc', None, None, None), ImpersonateTarget('abc', '120', None, None), True, False), (ImpersonateTarget('abc', None, 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', None), True, False), (ImpersonateTarget('abc', '121', 'xyz', 'test'), ImpersonateTarget('abc', '120', 'xyz', 'test'), False, False), (ImpersonateTarget('abc'), ImpersonateTarget('abc', '120', 'xyz', 'test'), True, False), (ImpersonateTarget('abc', '120', 'xyz', 'test'), ImpersonateTarget('abc'), True, False), (ImpersonateTarget(), ImpersonateTarget('abc', '120', 'xyz'), True, False), (ImpersonateTarget(), ImpersonateTarget(), True, True), ]) def test_impersonate_target_in(self, target1, target2, is_in, is_eq): assert (target1 in target2) is is_in assert (target1 == target2) is is_eq ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_networking_utils.py�����������������������������������������������������0000664�0000000�0000000�00000016703�14675634471�0021361�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import io import random import ssl from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import certifi from yt_dlp.networking import Response from yt_dlp.networking._helper import ( InstanceStoreMixin, add_accept_encoding_header, get_redirect_method, make_socks_proxy_opts, select_proxy, ssl_load_certs, ) from yt_dlp.networking.exceptions import ( HTTPError, IncompleteRead, ) from yt_dlp.socks import ProxyType from yt_dlp.utils.networking import HTTPHeaderDict TEST_DIR = os.path.dirname(os.path.abspath(__file__)) class TestNetworkingUtils: def test_select_proxy(self): proxies = { 'all': 'socks5://example.com', 'http': 'http://example.com:1080', 'no': 'bypass.example.com,yt-dl.org', } assert select_proxy('https://example.com', proxies) == proxies['all'] assert select_proxy('http://example.com', proxies) == proxies['http'] assert select_proxy('http://bypass.example.com', proxies) is None assert select_proxy('https://yt-dl.org', proxies) is None @pytest.mark.parametrize('socks_proxy,expected', [ ('socks5h://example.com', { 'proxytype': ProxyType.SOCKS5, 'addr': 'example.com', 'port': 1080, 'rdns': True, 'username': None, 'password': None, }), ('socks5://user:@example.com:5555', { 'proxytype': ProxyType.SOCKS5, 'addr': 'example.com', 'port': 5555, 'rdns': False, 'username': 'user', 'password': '', }), ('socks4://u%40ser:pa%20ss@127.0.0.1:1080', { 'proxytype': ProxyType.SOCKS4, 'addr': '127.0.0.1', 'port': 1080, 'rdns': False, 'username': 'u@ser', 'password': 'pa ss', }), ('socks4a://:pa%20ss@127.0.0.1', { 'proxytype': ProxyType.SOCKS4A, 'addr': '127.0.0.1', 'port': 1080, 'rdns': True, 'username': '', 'password': 'pa ss', }), ]) def test_make_socks_proxy_opts(self, socks_proxy, expected): assert make_socks_proxy_opts(socks_proxy) == expected def test_make_socks_proxy_unknown(self): with pytest.raises(ValueError, match='Unknown SOCKS proxy version: socks'): make_socks_proxy_opts('socks://127.0.0.1') @pytest.mark.skipif(not certifi, reason='certifi is not installed') def test_load_certifi(self): context_certifi = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context_certifi.load_verify_locations(cafile=certifi.where()) context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_load_certs(context, use_certifi=True) assert context.get_ca_certs() == context_certifi.get_ca_certs() context_default = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) context_default.load_default_certs() context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) ssl_load_certs(context, use_certifi=False) assert context.get_ca_certs() == context_default.get_ca_certs() if context_default.get_ca_certs() == context_certifi.get_ca_certs(): pytest.skip('System uses certifi as default. The test is not valid') @pytest.mark.parametrize('method,status,expected', [ ('GET', 303, 'GET'), ('HEAD', 303, 'HEAD'), ('PUT', 303, 'GET'), ('POST', 301, 'GET'), ('HEAD', 301, 'HEAD'), ('POST', 302, 'GET'), ('HEAD', 302, 'HEAD'), ('PUT', 302, 'PUT'), ('POST', 308, 'POST'), ('POST', 307, 'POST'), ('HEAD', 308, 'HEAD'), ('HEAD', 307, 'HEAD'), ]) def test_get_redirect_method(self, method, status, expected): assert get_redirect_method(method, status) == expected @pytest.mark.parametrize('headers,supported_encodings,expected', [ ({'Accept-Encoding': 'br'}, ['gzip', 'br'], {'Accept-Encoding': 'br'}), ({}, ['gzip', 'br'], {'Accept-Encoding': 'gzip, br'}), ({'Content-type': 'application/json'}, [], {'Content-type': 'application/json', 'Accept-Encoding': 'identity'}), ]) def test_add_accept_encoding_header(self, headers, supported_encodings, expected): headers = HTTPHeaderDict(headers) add_accept_encoding_header(headers, supported_encodings) assert headers == HTTPHeaderDict(expected) class TestInstanceStoreMixin: class FakeInstanceStoreMixin(InstanceStoreMixin): def _create_instance(self, **kwargs): return random.randint(0, 1000000) def _close_instance(self, instance): pass def test_mixin(self): mixin = self.FakeInstanceStoreMixin() assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) == mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'e', 4}}) != mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}}) assert mixin._get_instance(d={'a': 1, 'b': 2, 'c': {'d', 4}} != mixin._get_instance(d={'a': 1, 'b': 2, 'g': {'d', 4}})) assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) == mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) assert mixin._get_instance(d={'a': 1}, e=[1, 2, 3]) != mixin._get_instance(d={'a': 1}, e=[1, 2, 3, 4]) cookiejar = YoutubeDLCookieJar() assert mixin._get_instance(b=[1, 2], c=cookiejar) == mixin._get_instance(b=[1, 2], c=cookiejar) assert mixin._get_instance(b=[1, 2], c=cookiejar) != mixin._get_instance(b=[1, 2], c=YoutubeDLCookieJar()) # Different order assert mixin._get_instance(c=cookiejar, b=[1, 2]) == mixin._get_instance(b=[1, 2], c=cookiejar) m = mixin._get_instance(t=1234) assert mixin._get_instance(t=1234) == m mixin._clear_instances() assert mixin._get_instance(t=1234) != m class TestNetworkingExceptions: @staticmethod def create_response(status): return Response(fp=io.BytesIO(b'test'), url='http://example.com', headers={'tesT': 'test'}, status=status) def test_http_error(self): response = self.create_response(403) error = HTTPError(response) assert error.status == 403 assert str(error) == error.msg == 'HTTP Error 403: Forbidden' assert error.reason == response.reason assert error.response is response data = error.response.read() assert data == b'test' assert repr(error) == '<HTTPError 403: Forbidden>' def test_redirect_http_error(self): response = self.create_response(301) error = HTTPError(response, redirect_loop=True) assert str(error) == error.msg == 'HTTP Error 301: Moved Permanently (redirect loop detected)' assert error.reason == 'Moved Permanently' def test_incomplete_read_error(self): error = IncompleteRead(4, 3, cause='test') assert isinstance(error, IncompleteRead) assert repr(error) == '<IncompleteRead: 4 bytes read, 3 more expected>' assert str(error) == error.msg == '4 bytes read, 3 more expected' assert error.partial == 4 assert error.expected == 3 assert error.cause == 'test' error = IncompleteRead(3) assert repr(error) == '<IncompleteRead: 3 bytes read>' assert str(error) == '3 bytes read' �������������������������������������������������������������yt-dlp-2024.09.27/test/test_overwrites.py�����������������������������������������������������������0000664�0000000�0000000�00000003333�14675634471�0020156�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import subprocess from test.helper import is_download_test, try_rm root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) download_file = os.path.join(root_dir, 'test.webm') @is_download_test class TestOverwrites(unittest.TestCase): def setUp(self): # create an empty file open(download_file, 'a').close() def test_default_overwrites(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-o', 'test.webm', 'https://www.youtube.com/watch?v=jNQXAC9IVRw', ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' in sout) # if the file has no content, it has not been redownloaded self.assertTrue(os.path.getsize(download_file) < 1) def test_yes_overwrites(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '--yes-overwrites', '-o', 'test.webm', 'https://www.youtube.com/watch?v=jNQXAC9IVRw', ], cwd=root_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'has already been downloaded' not in sout) # if the file has no content, it has not been redownloaded self.assertTrue(os.path.getsize(download_file) > 1) def tearDown(self): try_rm(os.path.join(root_dir, 'test.webm')) if __name__ == '__main__': unittest.main() �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_plugins.py��������������������������������������������������������������0000664�0000000�0000000�00000005307�14675634471�0017431�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import importlib import os import shutil import sys import unittest from pathlib import Path sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) TEST_DATA_DIR = Path(os.path.dirname(os.path.abspath(__file__)), 'testdata') sys.path.append(str(TEST_DATA_DIR)) importlib.invalidate_caches() from yt_dlp.plugins import PACKAGE_NAME, directories, load_plugins class TestPlugins(unittest.TestCase): TEST_PLUGIN_DIR = TEST_DATA_DIR / PACKAGE_NAME def test_directories_containing_plugins(self): self.assertIn(self.TEST_PLUGIN_DIR, map(Path, directories())) def test_extractor_classes(self): for module_name in tuple(sys.modules): if module_name.startswith(f'{PACKAGE_NAME}.extractor'): del sys.modules[module_name] plugins_ie = load_plugins('extractor', 'IE') self.assertIn(f'{PACKAGE_NAME}.extractor.normal', sys.modules.keys()) self.assertIn('NormalPluginIE', plugins_ie.keys()) # don't load modules with underscore prefix self.assertFalse( f'{PACKAGE_NAME}.extractor._ignore' in sys.modules, 'loaded module beginning with underscore') self.assertNotIn('IgnorePluginIE', plugins_ie.keys()) # Don't load extractors with underscore prefix self.assertNotIn('_IgnoreUnderscorePluginIE', plugins_ie.keys()) # Don't load extractors not specified in __all__ (if supplied) self.assertNotIn('IgnoreNotInAllPluginIE', plugins_ie.keys()) self.assertIn('InAllPluginIE', plugins_ie.keys()) def test_postprocessor_classes(self): plugins_pp = load_plugins('postprocessor', 'PP') self.assertIn('NormalPluginPP', plugins_pp.keys()) def test_importing_zipped_module(self): zip_path = TEST_DATA_DIR / 'zipped_plugins.zip' shutil.make_archive(str(zip_path)[:-4], 'zip', str(zip_path)[:-4]) sys.path.append(str(zip_path)) # add zip to search paths importlib.invalidate_caches() # reset the import caches try: for plugin_type in ('extractor', 'postprocessor'): package = importlib.import_module(f'{PACKAGE_NAME}.{plugin_type}') self.assertIn(zip_path / PACKAGE_NAME / plugin_type, map(Path, package.__path__)) plugins_ie = load_plugins('extractor', 'IE') self.assertIn('ZippedPluginIE', plugins_ie.keys()) plugins_pp = load_plugins('postprocessor', 'PP') self.assertIn('ZippedPluginPP', plugins_pp.keys()) finally: sys.path.remove(str(zip_path)) os.remove(zip_path) importlib.invalidate_caches() # reset the import caches if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_post_hooks.py�����������������������������������������������������������0000664�0000000�0000000�00000003744�14675634471�0020143�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import get_params, is_download_test, try_rm import yt_dlp.YoutubeDL # isort: split from yt_dlp.utils import DownloadError class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.to_stderr = self.to_screen TEST_ID = 'gr51aVj-mLg' EXPECTED_NAME = 'gr51aVj-mLg' @is_download_test class TestPostHooks(unittest.TestCase): def setUp(self): self.stored_name_1 = None self.stored_name_2 = None self.params = get_params({ 'skip_download': False, 'writeinfojson': False, 'quiet': True, 'verbose': False, 'cachedir': False, }) self.files = [] def test_post_hooks(self): self.params['post_hooks'] = [self.hook_one, self.hook_two] ydl = YoutubeDL(self.params) ydl.download([TEST_ID]) self.assertEqual(self.stored_name_1, EXPECTED_NAME, 'Not the expected name from hook 1') self.assertEqual(self.stored_name_2, EXPECTED_NAME, 'Not the expected name from hook 2') def test_post_hook_exception(self): self.params['post_hooks'] = [self.hook_three] ydl = YoutubeDL(self.params) self.assertRaises(DownloadError, ydl.download, [TEST_ID]) def hook_one(self, filename): self.stored_name_1, _ = os.path.splitext(os.path.basename(filename)) self.files.append(filename) def hook_two(self, filename): self.stored_name_2, _ = os.path.splitext(os.path.basename(filename)) self.files.append(filename) def hook_three(self, filename): self.files.append(filename) raise Exception(f'Test exception for \'{filename}\'') def tearDown(self): for f in self.files: try_rm(f) if __name__ == '__main__': unittest.main() ����������������������������yt-dlp-2024.09.27/test/test_postprocessors.py�������������������������������������������������������0000664�0000000�0000000�00000074202�14675634471�0021060�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp import YoutubeDL from yt_dlp.utils import shell_quote from yt_dlp.postprocessor import ( ExecPP, FFmpegThumbnailsConvertorPP, MetadataFromFieldPP, MetadataParserPP, ModifyChaptersPP, SponsorBlockPP, ) class TestMetadataFromField(unittest.TestCase): def test_format_to_regex(self): self.assertEqual( MetadataParserPP.format_to_regex('%(title)s - %(artist)s'), r'(?P<title>.+)\ \-\ (?P<artist>.+)') self.assertEqual(MetadataParserPP.format_to_regex(r'(?P<x>.+)'), r'(?P<x>.+)') def test_field_to_template(self): self.assertEqual(MetadataParserPP.field_to_template('title'), '%(title)s') self.assertEqual(MetadataParserPP.field_to_template('1'), '1') self.assertEqual(MetadataParserPP.field_to_template('foo bar'), 'foo bar') self.assertEqual(MetadataParserPP.field_to_template(' literal'), ' literal') def test_metadatafromfield(self): self.assertEqual( MetadataFromFieldPP.to_action('%(title)s \\: %(artist)s:%(title)s : %(artist)s'), (MetadataParserPP.Actions.INTERPRET, '%(title)s : %(artist)s', '%(title)s : %(artist)s')) class TestConvertThumbnail(unittest.TestCase): def test_escaping(self): pp = FFmpegThumbnailsConvertorPP() if not pp.available: print('Skipping: ffmpeg not found') return file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}' tests = (('webp', 'png'), ('png', 'jpg')) for inp, out in tests: out_file = file.format(out) if os.path.exists(out_file): os.remove(out_file) pp.convert_thumbnail(file.format(inp), out) assert os.path.exists(out_file) for _, out in tests: os.remove(file.format(out)) class TestExec(unittest.TestCase): def test_parse_cmd(self): pp = ExecPP(YoutubeDL(), '') info = {'filepath': 'file name'} cmd = 'echo {}'.format(shell_quote(info['filepath'])) self.assertEqual(pp.parse_cmd('echo', info), cmd) self.assertEqual(pp.parse_cmd('echo {}', info), cmd) self.assertEqual(pp.parse_cmd('echo %(filepath)q', info), cmd) class TestModifyChaptersPP(unittest.TestCase): def setUp(self): self._pp = ModifyChaptersPP(YoutubeDL()) @staticmethod def _sponsor_chapter(start, end, cat, remove=False, title=None): if title is None: title = SponsorBlockPP.CATEGORIES[cat] return { 'start_time': start, 'end_time': end, '_categories': [(cat, start, end, title)], **({'remove': True} if remove else {}), } @staticmethod def _chapter(start, end, title=None, remove=False): c = {'start_time': start, 'end_time': end} if title is not None: c['title'] = title if remove: c['remove'] = True return c def _chapters(self, ends, titles): self.assertEqual(len(ends), len(titles)) start = 0 chapters = [] for e, t in zip(ends, titles): chapters.append(self._chapter(start, e, t)) start = e return chapters def _remove_marked_arrange_sponsors_test_impl( self, chapters, expected_chapters, expected_removed): actual_chapters, actual_removed = ( self._pp._remove_marked_arrange_sponsors(chapters)) for c in actual_removed: c.pop('title', None) c.pop('_categories', None) actual_chapters = [{ 'start_time': c['start_time'], 'end_time': c['end_time'], 'title': c['title'], } for c in actual_chapters] self.assertSequenceEqual(expected_chapters, actual_chapters) self.assertSequenceEqual(expected_removed, actual_removed) def test_remove_marked_arrange_sponsors_CanGetThroughUnaltered(self): chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) def test_remove_marked_arrange_sponsors_ChapterWithSponsors(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'preview'), self._sponsor_chapter(50, 60, 'filler')] expected = self._chapters( [10, 20, 30, 40, 50, 60, 70], ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Preview/Recap', 'c', '[SponsorBlock]: Filler Tangent', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_SponsorBlockChapters(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'chapter', title='sb c1'), self._sponsor_chapter(15, 16, 'chapter', title='sb c2'), self._sponsor_chapter(30, 40, 'preview'), self._sponsor_chapter(50, 60, 'filler')] expected = self._chapters( [10, 15, 16, 20, 30, 40, 50, 60, 70], ['c', '[SponsorBlock]: sb c1', '[SponsorBlock]: sb c1, sb c2', '[SponsorBlock]: sb c1', 'c', '[SponsorBlock]: Preview/Recap', 'c', '[SponsorBlock]: Filler Tangent', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_UniqueNamesForOverlappingSponsors(self): chapters = [ *self._chapters([120], ['c']), self._sponsor_chapter(10, 45, 'sponsor'), self._sponsor_chapter(20, 40, 'selfpromo'), self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 85, 'selfpromo'), self._sponsor_chapter(90, 120, 'selfpromo'), self._sponsor_chapter(100, 110, 'sponsor')] expected = self._chapters( [10, 20, 40, 45, 50, 60, 70, 85, 90, 100, 110, 120], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Sponsor', '[SponsorBlock]: Unpaid/Self Promotion']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithCuts(self): cuts = [self._chapter(10, 20, remove=True), self._sponsor_chapter(30, 40, 'sponsor', remove=True), self._chapter(50, 60, remove=True)] chapters = self._chapters([70], ['c']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([40], ['c']), cuts) def test_remove_marked_arrange_sponsors_ChapterWithSponsorsAndCuts(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(30, 40, 'selfpromo', remove=True), self._sponsor_chapter(50, 60, 'interaction')] expected = self._chapters([10, 20, 40, 50, 60], ['c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 40, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithSponsorCutInTheMiddle(self): cuts = [self._sponsor_chapter(20, 30, 'selfpromo', remove=True), self._chapter(40, 50, remove=True)] chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), *cuts] expected = self._chapters( [10, 40, 50], ['c', '[SponsorBlock]: Sponsor', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithCutHidingSponsor(self): cuts = [self._sponsor_chapter(20, 50, 'selfpromo', remove=True)] chapters = [ *self._chapters([60], ['c']), self._sponsor_chapter(10, 20, 'intro'), self._sponsor_chapter(30, 40, 'sponsor'), self._sponsor_chapter(50, 60, 'outro'), *cuts] expected = self._chapters( [10, 20, 30], ['c', '[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentSponsors(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'selfpromo'), self._sponsor_chapter(30, 40, 'interaction')] expected = self._chapters( [10, 20, 30, 40, 70], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithAdjacentCuts(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 20, 'sponsor'), self._sponsor_chapter(20, 30, 'interaction', remove=True), self._chapter(30, 40, remove=True), self._sponsor_chapter(40, 50, 'selfpromo', remove=True), self._sponsor_chapter(50, 60, 'interaction')] expected = self._chapters([10, 20, 30, 40], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(20, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingSponsors(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'selfpromo'), self._sponsor_chapter(40, 60, 'interaction')] expected = self._chapters( [10, 20, 30, 40, 50, 60, 70], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', '[SponsorBlock]: Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithOverlappingCuts(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor', remove=True), self._sponsor_chapter(20, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 60, 'interaction', remove=True)] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([20], ['c']), [self._chapter(10, 60, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsors(self): chapters = [ *self._chapters([170], ['c']), self._sponsor_chapter(0, 30, 'intro'), self._sponsor_chapter(20, 50, 'sponsor'), self._sponsor_chapter(40, 60, 'selfpromo'), self._sponsor_chapter(70, 90, 'sponsor'), self._sponsor_chapter(80, 100, 'sponsor'), self._sponsor_chapter(90, 110, 'sponsor'), self._sponsor_chapter(120, 140, 'selfpromo'), self._sponsor_chapter(130, 160, 'interaction'), self._sponsor_chapter(150, 170, 'outro')] expected = self._chapters( [20, 30, 40, 50, 60, 70, 110, 120, 130, 140, 150, 160, 170], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Intermission/Intro Animation, Sponsor', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion', 'c', '[SponsorBlock]: Sponsor', 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Interaction Reminder', '[SponsorBlock]: Interaction Reminder', '[SponsorBlock]: Interaction Reminder, Endcards/Credits', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingCuts(self): chapters = [ *self._chapters([170], ['c']), self._chapter(0, 30, remove=True), self._sponsor_chapter(20, 50, 'sponsor', remove=True), self._chapter(40, 60, remove=True), self._sponsor_chapter(70, 90, 'sponsor', remove=True), self._chapter(80, 100, remove=True), self._chapter(90, 110, remove=True), self._sponsor_chapter(120, 140, 'sponsor', remove=True), self._sponsor_chapter(130, 160, 'selfpromo', remove=True), self._chapter(150, 170, remove=True)] expected_cuts = [self._chapter(0, 60, remove=True), self._chapter(70, 110, remove=True), self._chapter(120, 170, remove=True)] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([20], ['c']), expected_cuts) def test_remove_marked_arrange_sponsors_OverlappingSponsorsDifferentTitlesAfterCut(self): chapters = [ *self._chapters([60], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(10, 40, 'intro'), self._sponsor_chapter(30, 50, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 50, 'interaction'), self._sponsor_chapter(50, 60, 'outro')] expected = self._chapters( [10, 30, 40], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsNoLongerOverlapAfterCut(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 30, 'sponsor'), self._sponsor_chapter(20, 50, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True), self._sponsor_chapter(40, 60, 'sponsor'), self._sponsor_chapter(50, 60, 'interaction')] expected = self._chapters( [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_SponsorsStillOverlapAfterCut(self): chapters = [ *self._chapters([70], ['c']), self._sponsor_chapter(10, 60, 'sponsor'), self._sponsor_chapter(20, 60, 'interaction'), self._sponsor_chapter(30, 50, 'selfpromo', remove=True)] expected = self._chapters( [10, 20, 40, 50], ['c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', 'c']) self._remove_marked_arrange_sponsors_test_impl( chapters, expected, [self._chapter(30, 50, remove=True)]) def test_remove_marked_arrange_sponsors_ChapterWithRunsOfOverlappingSponsorsAndCuts(self): chapters = [ *self._chapters([200], ['c']), self._sponsor_chapter(10, 40, 'sponsor'), self._sponsor_chapter(10, 30, 'intro'), self._chapter(20, 30, remove=True), self._sponsor_chapter(30, 40, 'selfpromo'), self._sponsor_chapter(50, 70, 'sponsor'), self._sponsor_chapter(60, 80, 'interaction'), self._chapter(70, 80, remove=True), self._sponsor_chapter(70, 90, 'sponsor'), self._sponsor_chapter(80, 100, 'interaction'), self._sponsor_chapter(120, 170, 'selfpromo'), self._sponsor_chapter(130, 180, 'outro'), self._chapter(140, 150, remove=True), self._chapter(150, 160, remove=True)] expected = self._chapters( [10, 20, 30, 40, 50, 70, 80, 100, 110, 130, 140, 160], ['c', '[SponsorBlock]: Sponsor, Intermission/Intro Animation', '[SponsorBlock]: Sponsor, Unpaid/Self Promotion', 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Sponsor, Interaction Reminder', '[SponsorBlock]: Interaction Reminder', 'c', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Unpaid/Self Promotion, Endcards/Credits', '[SponsorBlock]: Endcards/Credits', 'c']) expected_cuts = [self._chapter(20, 30, remove=True), self._chapter(70, 80, remove=True), self._chapter(140, 160, remove=True)] self._remove_marked_arrange_sponsors_test_impl(chapters, expected, expected_cuts) def test_remove_marked_arrange_sponsors_SponsorOverlapsMultipleChapters(self): chapters = [ *self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']), self._sponsor_chapter(10, 90, 'sponsor')] expected = self._chapters([10, 90, 100], ['c1', '[SponsorBlock]: Sponsor', 'c5']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutOverlapsMultipleChapters(self): cuts = [self._chapter(10, 90, remove=True)] chapters = self._chapters([20, 40, 60, 80, 100], ['c1', 'c2', 'c3', 'c4', 'c5']) + cuts expected = self._chapters([10, 20], ['c1', 'c5']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsWithinSomeChaptersAndOverlappingOthers(self): chapters = [ *self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']), self._sponsor_chapter(20, 30, 'sponsor'), self._sponsor_chapter(50, 70, 'selfpromo')] expected = self._chapters([10, 20, 30, 40, 50, 70, 80], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c2', 'c3', '[SponsorBlock]: Unpaid/Self Promotion', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutsWithinSomeChaptersAndOverlappingOthers(self): cuts = [self._chapter(20, 30, remove=True), self._chapter(50, 70, remove=True)] chapters = self._chapters([10, 40, 60, 80], ['c1', 'c2', 'c3', 'c4']) + cuts expected = self._chapters([10, 30, 40, 50], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_ChaptersAfterLastSponsor(self): chapters = [ *self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']), self._sponsor_chapter(10, 30, 'music_offtopic')] expected = self._chapters( [10, 30, 40, 50, 60], ['c1', '[SponsorBlock]: Non-Music Section', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_ChaptersAfterLastCut(self): cuts = [self._chapter(10, 30, remove=True)] chapters = self._chapters([20, 40, 50, 60], ['c1', 'c2', 'c3', 'c4']) + cuts expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorStartsAtChapterStart(self): chapters = [ *self._chapters([10, 20, 40], ['c1', 'c2', 'c3']), self._sponsor_chapter(20, 30, 'sponsor')] expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutStartsAtChapterStart(self): cuts = [self._chapter(20, 30, remove=True)] chapters = self._chapters([10, 20, 40], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorEndsAtChapterEnd(self): chapters = [ *self._chapters([10, 30, 40], ['c1', 'c2', 'c3']), self._sponsor_chapter(20, 30, 'sponsor')] expected = self._chapters([10, 20, 30, 40], ['c1', 'c2', '[SponsorBlock]: Sponsor', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutEndsAtChapterEnd(self): cuts = [self._chapter(20, 30, remove=True)] chapters = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10, 20, 30], ['c1', 'c2', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorCoincidesWithChapters(self): chapters = [ *self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']), self._sponsor_chapter(10, 30, 'sponsor')] expected = self._chapters([10, 30, 40], ['c1', '[SponsorBlock]: Sponsor', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutCoincidesWithChapters(self): cuts = [self._chapter(10, 30, remove=True)] chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts expected = self._chapters([10, 20], ['c1', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsAtVideoBoundaries(self): chapters = [ *self._chapters([20, 40, 60], ['c1', 'c2', 'c3']), self._sponsor_chapter(0, 10, 'intro'), self._sponsor_chapter(50, 60, 'outro')] expected = self._chapters( [10, 20, 40, 50, 60], ['[SponsorBlock]: Intermission/Intro Animation', 'c1', 'c2', 'c3', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutsAtVideoBoundaries(self): cuts = [self._chapter(0, 10, remove=True), self._chapter(50, 60, remove=True)] chapters = self._chapters([20, 40, 60], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10, 30, 40], ['c1', 'c2', 'c3']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_SponsorsOverlapChaptersAtVideoBoundaries(self): chapters = [ *self._chapters([10, 40, 50], ['c1', 'c2', 'c3']), self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(30, 50, 'outro')] expected = self._chapters( [20, 30, 50], ['[SponsorBlock]: Intermission/Intro Animation', 'c2', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_CutsOverlapChaptersAtVideoBoundaries(self): cuts = [self._chapter(0, 20, remove=True), self._chapter(30, 50, remove=True)] chapters = self._chapters([10, 40, 50], ['c1', 'c2', 'c3']) + cuts expected = self._chapters([10], ['c2']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, cuts) def test_remove_marked_arrange_sponsors_EverythingSponsored(self): chapters = [ *self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']), self._sponsor_chapter(0, 20, 'intro'), self._sponsor_chapter(20, 40, 'outro')] expected = self._chapters([20, 40], ['[SponsorBlock]: Intermission/Intro Animation', '[SponsorBlock]: Endcards/Credits']) self._remove_marked_arrange_sponsors_test_impl(chapters, expected, []) def test_remove_marked_arrange_sponsors_EverythingCut(self): cuts = [self._chapter(0, 20, remove=True), self._chapter(20, 40, remove=True)] chapters = self._chapters([10, 20, 30, 40], ['c1', 'c2', 'c3', 'c4']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, [], [self._chapter(0, 40, remove=True)]) def test_remove_marked_arrange_sponsors_TinyChaptersInTheOriginalArePreserved(self): chapters = self._chapters([0.1, 0.2, 0.3, 0.4], ['c1', 'c2', 'c3', 'c4']) self._remove_marked_arrange_sponsors_test_impl(chapters, chapters, []) def test_remove_marked_arrange_sponsors_TinySponsorsAreIgnored(self): chapters = [self._sponsor_chapter(0, 0.1, 'intro'), self._chapter(0.1, 0.2, 'c1'), self._sponsor_chapter(0.2, 0.3, 'sponsor'), self._chapter(0.3, 0.4, 'c2'), self._sponsor_chapter(0.4, 0.5, 'outro')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([0.3, 0.5], ['c1', 'c2']), []) def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromCutsAreIgnored(self): cuts = [self._chapter(1.5, 2.5, remove=True)] chapters = self._chapters([2, 3, 3.5], ['c1', 'c2', 'c3']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 2.5], ['c1', 'c3']), cuts) def test_remove_marked_arrange_sponsors_SingleTinyChapterIsPreserved(self): cuts = [self._chapter(0.5, 2, remove=True)] chapters = self._chapters([2], ['c']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([0.5], ['c']), cuts) def test_remove_marked_arrange_sponsors_TinyChapterAtTheStartPrependedToTheNext(self): cuts = [self._chapter(0.5, 2, remove=True)] chapters = self._chapters([2, 4], ['c1', 'c2']) + cuts self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2.5], ['c2']), cuts) def test_remove_marked_arrange_sponsors_TinyChaptersResultingFromSponsorOverlapAreIgnored(self): chapters = [ *self._chapters([1, 3, 4], ['c1', 'c2', 'c3']), self._sponsor_chapter(1.5, 2.5, 'sponsor')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 2.5, 4], ['c1', '[SponsorBlock]: Sponsor', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsOverlapsAreIgnored(self): chapters = [ *self._chapters([2, 3, 5], ['c1', 'c2', 'c3']), self._sponsor_chapter(1, 3, 'sponsor'), self._sponsor_chapter(2.5, 4, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1, 3, 4, 5], [ 'c1', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', 'c3']), []) def test_remove_marked_arrange_sponsors_TinySponsorsPrependedToTheNextSponsor(self): chapters = [ *self._chapters([4], ['c']), self._sponsor_chapter(1.5, 2, 'sponsor'), self._sponsor_chapter(2, 4, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([1.5, 4], ['c', '[SponsorBlock]: Unpaid/Self Promotion']), []) def test_remove_marked_arrange_sponsors_SmallestSponsorInTheOverlapGetsNamed(self): self._pp._sponsorblock_chapter_title = '[SponsorBlock]: %(name)s' chapters = [ *self._chapters([10], ['c']), self._sponsor_chapter(2, 8, 'sponsor'), self._sponsor_chapter(4, 6, 'selfpromo')] self._remove_marked_arrange_sponsors_test_impl( chapters, self._chapters([2, 4, 6, 8, 10], [ 'c', '[SponsorBlock]: Sponsor', '[SponsorBlock]: Unpaid/Self Promotion', '[SponsorBlock]: Sponsor', 'c', ]), []) def test_make_concat_opts_CommonCase(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 file 'file:test' outpoint 1.000000 file 'file:test' inpoint 2.000000 outpoint 10.000000 file 'file:test' inpoint 20.000000 ''' opts = self._pp._make_concat_opts(sponsor_chapters, 30) self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) def test_make_concat_opts_NoZeroDurationChunkAtVideoStart(self): sponsor_chapters = [self._chapter(0, 1, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 file 'file:test' inpoint 1.000000 outpoint 10.000000 file 'file:test' inpoint 20.000000 ''' opts = self._pp._make_concat_opts(sponsor_chapters, 30) self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) def test_make_concat_opts_NoZeroDurationChunkAtVideoEnd(self): sponsor_chapters = [self._chapter(1, 2, 's1'), self._chapter(10, 20, 's2')] expected = '''ffconcat version 1.0 file 'file:test' outpoint 1.000000 file 'file:test' inpoint 2.000000 outpoint 10.000000 ''' opts = self._pp._make_concat_opts(sponsor_chapters, 20) self.assertEqual(expected, ''.join(self._pp._concat_spec(['test'] * len(opts), opts))) def test_quote_for_concat_RunsOfQuotes(self): self.assertEqual( r"'special '\'' '\'\''characters'\'\'\''galore'", self._pp._quote_for_ffmpeg("special ' ''characters'''galore")) def test_quote_for_concat_QuotesAtStart(self): self.assertEqual( r"\'\'\''special '\'' characters '\'' galore'", self._pp._quote_for_ffmpeg("'''special ' characters ' galore")) def test_quote_for_concat_QuotesAtEnd(self): self.assertEqual( r"'special '\'' characters '\'' galore'\'\'\'", self._pp._quote_for_ffmpeg("special ' characters ' galore'''")) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_socks.py����������������������������������������������������������������0000664�0000000�0000000�00000046315�14675634471�0017076�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import threading import unittest import pytest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import abc import contextlib import enum import functools import http.server import json import random import socket import struct import time from socketserver import ( BaseRequestHandler, StreamRequestHandler, ThreadingTCPServer, ) from test.helper import http_server_port, verify_address_availability from yt_dlp.networking import Request from yt_dlp.networking.exceptions import ProxyError, TransportError from yt_dlp.socks import ( SOCKS4_REPLY_VERSION, SOCKS4_VERSION, SOCKS5_USER_AUTH_SUCCESS, SOCKS5_USER_AUTH_VERSION, SOCKS5_VERSION, Socks5AddressType, Socks5Auth, ) SOCKS5_USER_AUTH_FAILURE = 0x1 class Socks4CD(enum.IntEnum): REQUEST_GRANTED = 90 REQUEST_REJECTED_OR_FAILED = 91 REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD = 92 REQUEST_REJECTED_DIFFERENT_USERID = 93 class Socks5Reply(enum.IntEnum): SUCCEEDED = 0x0 GENERAL_FAILURE = 0x1 CONNECTION_NOT_ALLOWED = 0x2 NETWORK_UNREACHABLE = 0x3 HOST_UNREACHABLE = 0x4 CONNECTION_REFUSED = 0x5 TTL_EXPIRED = 0x6 COMMAND_NOT_SUPPORTED = 0x7 ADDRESS_TYPE_NOT_SUPPORTED = 0x8 class SocksTestRequestHandler(BaseRequestHandler): def __init__(self, *args, socks_info=None, **kwargs): self.socks_info = socks_info super().__init__(*args, **kwargs) class SocksProxyHandler(BaseRequestHandler): def __init__(self, request_handler_class, socks_server_kwargs, *args, **kwargs): self.socks_kwargs = socks_server_kwargs or {} self.request_handler_class = request_handler_class super().__init__(*args, **kwargs) class Socks5ProxyHandler(StreamRequestHandler, SocksProxyHandler): # SOCKS5 protocol https://tools.ietf.org/html/rfc1928 # SOCKS5 username/password authentication https://tools.ietf.org/html/rfc1929 def handle(self): sleep = self.socks_kwargs.get('sleep') if sleep: time.sleep(sleep) version, nmethods = self.connection.recv(2) assert version == SOCKS5_VERSION methods = list(self.connection.recv(nmethods)) auth = self.socks_kwargs.get('auth') if auth is not None and Socks5Auth.AUTH_USER_PASS not in methods: self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE)) self.server.close_request(self.request) return elif Socks5Auth.AUTH_USER_PASS in methods: self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_USER_PASS)) _, user_len = struct.unpack('!BB', self.connection.recv(2)) username = self.connection.recv(user_len).decode() pass_len = ord(self.connection.recv(1)) password = self.connection.recv(pass_len).decode() if username == auth[0] and password == auth[1]: self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_SUCCESS)) else: self.connection.sendall(struct.pack('!BB', SOCKS5_USER_AUTH_VERSION, SOCKS5_USER_AUTH_FAILURE)) self.server.close_request(self.request) return elif Socks5Auth.AUTH_NONE in methods: self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NONE)) else: self.connection.sendall(struct.pack('!BB', SOCKS5_VERSION, Socks5Auth.AUTH_NO_ACCEPTABLE)) self.server.close_request(self.request) return version, command, _, address_type = struct.unpack('!BBBB', self.connection.recv(4)) socks_info = { 'version': version, 'auth_methods': methods, 'command': command, 'client_address': self.client_address, 'ipv4_address': None, 'domain_address': None, 'ipv6_address': None, } if address_type == Socks5AddressType.ATYP_IPV4: socks_info['ipv4_address'] = socket.inet_ntoa(self.connection.recv(4)) elif address_type == Socks5AddressType.ATYP_DOMAINNAME: socks_info['domain_address'] = self.connection.recv(ord(self.connection.recv(1))).decode() elif address_type == Socks5AddressType.ATYP_IPV6: socks_info['ipv6_address'] = socket.inet_ntop(socket.AF_INET6, self.connection.recv(16)) else: self.server.close_request(self.request) socks_info['port'] = struct.unpack('!H', self.connection.recv(2))[0] # dummy response, the returned IP is just a placeholder self.connection.sendall(struct.pack( '!BBBBIH', SOCKS5_VERSION, self.socks_kwargs.get('reply', Socks5Reply.SUCCEEDED), 0x0, 0x1, 0x7f000001, 40000)) self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info) class Socks4ProxyHandler(StreamRequestHandler, SocksProxyHandler): # SOCKS4 protocol http://www.openssh.com/txt/socks4.protocol # SOCKS4A protocol http://www.openssh.com/txt/socks4a.protocol def _read_until_null(self): return b''.join(iter(functools.partial(self.connection.recv, 1), b'\x00')) def handle(self): sleep = self.socks_kwargs.get('sleep') if sleep: time.sleep(sleep) socks_info = { 'version': SOCKS4_VERSION, 'command': None, 'client_address': self.client_address, 'ipv4_address': None, 'port': None, 'domain_address': None, } version, command, dest_port, dest_ip = struct.unpack('!BBHI', self.connection.recv(8)) socks_info['port'] = dest_port socks_info['command'] = command if version != SOCKS4_VERSION: self.server.close_request(self.request) return use_remote_dns = False if 0x0 < dest_ip <= 0xFF: use_remote_dns = True else: socks_info['ipv4_address'] = socket.inet_ntoa(struct.pack('!I', dest_ip)) user_id = self._read_until_null().decode() if user_id != (self.socks_kwargs.get('user_id') or ''): self.connection.sendall(struct.pack( '!BBHI', SOCKS4_REPLY_VERSION, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, 0x00, 0x00000000)) self.server.close_request(self.request) return if use_remote_dns: socks_info['domain_address'] = self._read_until_null().decode() # dummy response, the returned IP is just a placeholder self.connection.sendall( struct.pack( '!BBHI', SOCKS4_REPLY_VERSION, self.socks_kwargs.get('cd_reply', Socks4CD.REQUEST_GRANTED), 40000, 0x7f000001)) self.request_handler_class(self.request, self.client_address, self.server, socks_info=socks_info) class IPv6ThreadingTCPServer(ThreadingTCPServer): address_family = socket.AF_INET6 class SocksHTTPTestRequestHandler(http.server.BaseHTTPRequestHandler, SocksTestRequestHandler): def do_GET(self): if self.path == '/socks_info': payload = json.dumps(self.socks_info.copy()) self.send_response(200) self.send_header('Content-Type', 'application/json; charset=utf-8') self.send_header('Content-Length', str(len(payload))) self.end_headers() self.wfile.write(payload.encode()) class SocksWebSocketTestRequestHandler(SocksTestRequestHandler): def handle(self): import websockets.sync.server protocol = websockets.ServerProtocol() connection = websockets.sync.server.ServerConnection(socket=self.request, protocol=protocol, close_timeout=0) connection.handshake() connection.send(json.dumps(self.socks_info)) connection.close() @contextlib.contextmanager def socks_server(socks_server_class, request_handler, bind_ip=None, **socks_server_kwargs): server = server_thread = None try: bind_address = bind_ip or '127.0.0.1' server_type = ThreadingTCPServer if '.' in bind_address else IPv6ThreadingTCPServer server = server_type( (bind_address, 0), functools.partial(socks_server_class, request_handler, socks_server_kwargs)) server_port = http_server_port(server) server_thread = threading.Thread(target=server.serve_forever) server_thread.daemon = True server_thread.start() if '.' not in bind_address: yield f'[{bind_address}]:{server_port}' else: yield f'{bind_address}:{server_port}' finally: server.shutdown() server.server_close() server_thread.join(2.0) class SocksProxyTestContext(abc.ABC): REQUEST_HANDLER_CLASS = None def socks_server(self, server_class, *args, **kwargs): return socks_server(server_class, self.REQUEST_HANDLER_CLASS, *args, **kwargs) @abc.abstractmethod def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs) -> dict: """return a dict of socks_info""" class HTTPSocksTestProxyContext(SocksProxyTestContext): REQUEST_HANDLER_CLASS = SocksHTTPTestRequestHandler def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): request = Request(f'http://{target_domain or "127.0.0.1"}:{target_port or "40000"}/socks_info', **req_kwargs) handler.validate(request) return json.loads(handler.send(request).read().decode()) class WebSocketSocksTestProxyContext(SocksProxyTestContext): REQUEST_HANDLER_CLASS = SocksWebSocketTestRequestHandler def socks_info_request(self, handler, target_domain=None, target_port=None, **req_kwargs): request = Request(f'ws://{target_domain or "127.0.0.1"}:{target_port or "40000"}', **req_kwargs) handler.validate(request) ws = handler.send(request) ws.send('socks_info') socks_info = ws.recv() ws.close() return json.loads(socks_info) CTX_MAP = { 'http': HTTPSocksTestProxyContext, 'ws': WebSocketSocksTestProxyContext, } @pytest.fixture(scope='module') def ctx(request): return CTX_MAP[request.param]() @pytest.mark.parametrize( 'handler,ctx', [ ('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws'), ('CurlCFFI', 'http'), ], indirect=True) class TestSocks4Proxy: def test_socks4_no_auth(self, handler, ctx): with handler() as rh: with ctx.socks_server(Socks4ProxyHandler) as server_address: response = ctx.socks_info_request( rh, proxies={'all': f'socks4://{server_address}'}) assert response['version'] == 4 def test_socks4_auth(self, handler, ctx): with handler() as rh: with ctx.socks_server(Socks4ProxyHandler, user_id='user') as server_address: with pytest.raises(ProxyError): ctx.socks_info_request(rh, proxies={'all': f'socks4://{server_address}'}) response = ctx.socks_info_request( rh, proxies={'all': f'socks4://user:@{server_address}'}) assert response['version'] == 4 def test_socks4a_ipv4_target(self, handler, ctx): with ctx.socks_server(Socks4ProxyHandler) as server_address: with handler(proxies={'all': f'socks4a://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='127.0.0.1') assert response['version'] == 4 assert (response['ipv4_address'] == '127.0.0.1') != (response['domain_address'] == '127.0.0.1') def test_socks4a_domain_target(self, handler, ctx): with ctx.socks_server(Socks4ProxyHandler) as server_address: with handler(proxies={'all': f'socks4a://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='localhost') assert response['version'] == 4 assert response['ipv4_address'] is None assert response['domain_address'] == 'localhost' def test_ipv4_client_source_address(self, handler, ctx): with ctx.socks_server(Socks4ProxyHandler) as server_address: source_address = f'127.0.0.{random.randint(5, 255)}' verify_address_availability(source_address) with handler(proxies={'all': f'socks4://{server_address}'}, source_address=source_address) as rh: response = ctx.socks_info_request(rh) assert response['client_address'][0] == source_address assert response['version'] == 4 @pytest.mark.parametrize('reply_code', [ Socks4CD.REQUEST_REJECTED_OR_FAILED, Socks4CD.REQUEST_REJECTED_CANNOT_CONNECT_TO_IDENTD, Socks4CD.REQUEST_REJECTED_DIFFERENT_USERID, ]) def test_socks4_errors(self, handler, ctx, reply_code): with ctx.socks_server(Socks4ProxyHandler, cd_reply=reply_code) as server_address: with handler(proxies={'all': f'socks4://{server_address}'}) as rh: with pytest.raises(ProxyError): ctx.socks_info_request(rh) def test_ipv6_socks4_proxy(self, handler, ctx): with ctx.socks_server(Socks4ProxyHandler, bind_ip='::1') as server_address: with handler(proxies={'all': f'socks4://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='127.0.0.1') assert response['client_address'][0] == '::1' assert response['ipv4_address'] == '127.0.0.1' assert response['version'] == 4 def test_timeout(self, handler, ctx): with ctx.socks_server(Socks4ProxyHandler, sleep=2) as server_address: with handler(proxies={'all': f'socks4://{server_address}'}, timeout=0.5) as rh: with pytest.raises(TransportError): ctx.socks_info_request(rh) @pytest.mark.parametrize( 'handler,ctx', [ ('Urllib', 'http'), ('Requests', 'http'), ('Websockets', 'ws'), ('CurlCFFI', 'http'), ], indirect=True) class TestSocks5Proxy: def test_socks5_no_auth(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: with handler(proxies={'all': f'socks5://{server_address}'}) as rh: response = ctx.socks_info_request(rh) assert response['auth_methods'] == [0x0] assert response['version'] == 5 def test_socks5_user_pass(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler, auth=('test', 'testpass')) as server_address: with handler() as rh: with pytest.raises(ProxyError): ctx.socks_info_request(rh, proxies={'all': f'socks5://{server_address}'}) response = ctx.socks_info_request( rh, proxies={'all': f'socks5://test:testpass@{server_address}'}) assert response['auth_methods'] == [Socks5Auth.AUTH_NONE, Socks5Auth.AUTH_USER_PASS] assert response['version'] == 5 def test_socks5_ipv4_target(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: with handler(proxies={'all': f'socks5://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='127.0.0.1') assert response['ipv4_address'] == '127.0.0.1' assert response['version'] == 5 def test_socks5_domain_target(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: with handler(proxies={'all': f'socks5://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='localhost') assert (response['ipv4_address'] == '127.0.0.1') != (response['ipv6_address'] == '::1') assert response['version'] == 5 def test_socks5h_domain_target(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: with handler(proxies={'all': f'socks5h://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='localhost') assert response['ipv4_address'] is None assert response['domain_address'] == 'localhost' assert response['version'] == 5 def test_socks5h_ip_target(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: with handler(proxies={'all': f'socks5h://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='127.0.0.1') assert response['ipv4_address'] == '127.0.0.1' assert response['domain_address'] is None assert response['version'] == 5 def test_socks5_ipv6_destination(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: with handler(proxies={'all': f'socks5://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='[::1]') assert response['ipv6_address'] == '::1' assert response['version'] == 5 def test_ipv6_socks5_proxy(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler, bind_ip='::1') as server_address: with handler(proxies={'all': f'socks5://{server_address}'}) as rh: response = ctx.socks_info_request(rh, target_domain='127.0.0.1') assert response['client_address'][0] == '::1' assert response['ipv4_address'] == '127.0.0.1' assert response['version'] == 5 # XXX: is there any feasible way of testing IPv6 source addresses? # Same would go for non-proxy source_address test... def test_ipv4_client_source_address(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler) as server_address: source_address = f'127.0.0.{random.randint(5, 255)}' verify_address_availability(source_address) with handler(proxies={'all': f'socks5://{server_address}'}, source_address=source_address) as rh: response = ctx.socks_info_request(rh) assert response['client_address'][0] == source_address assert response['version'] == 5 @pytest.mark.parametrize('reply_code', [ Socks5Reply.GENERAL_FAILURE, Socks5Reply.CONNECTION_NOT_ALLOWED, Socks5Reply.NETWORK_UNREACHABLE, Socks5Reply.HOST_UNREACHABLE, Socks5Reply.CONNECTION_REFUSED, Socks5Reply.TTL_EXPIRED, Socks5Reply.COMMAND_NOT_SUPPORTED, Socks5Reply.ADDRESS_TYPE_NOT_SUPPORTED, ]) def test_socks5_errors(self, handler, ctx, reply_code): with ctx.socks_server(Socks5ProxyHandler, reply=reply_code) as server_address: with handler(proxies={'all': f'socks5://{server_address}'}) as rh: with pytest.raises(ProxyError): ctx.socks_info_request(rh) def test_timeout(self, handler, ctx): with ctx.socks_server(Socks5ProxyHandler, sleep=2) as server_address: with handler(proxies={'all': f'socks5://{server_address}'}, timeout=1) as rh: with pytest.raises(TransportError): ctx.socks_info_request(rh) if __name__ == '__main__': unittest.main() �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_subtitles.py������������������������������������������������������������0000664�0000000�0000000�00000041717�14675634471�0017773�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test, md5 from yt_dlp.extractor import ( NPOIE, NRKTVIE, PBSIE, CeskaTelevizeIE, ComedyCentralIE, DailymotionIE, DemocracynowIE, LyndaIE, RaiPlayIE, RTVEALaCartaIE, TedTalkIE, ThePlatformFeedIE, ThePlatformIE, VikiIE, VimeoIE, WallaIE, YoutubeIE, ) @is_download_test class BaseTestSubtitles(unittest.TestCase): url = None IE = None def setUp(self): self.DL = FakeYDL() self.ie = self.IE() self.DL.add_info_extractor(self.ie) if not self.IE.working(): print(f'Skipping: {self.IE.ie_key()} marked as not _WORKING') self.skipTest('IE marked as not _WORKING') def getInfoDict(self): return self.DL.extract_info(self.url, download=False) def getSubtitles(self): info_dict = self.getInfoDict() subtitles = info_dict['requested_subtitles'] if not subtitles: return subtitles for sub_info in subtitles.values(): if sub_info.get('data') is None: uf = self.DL.urlopen(sub_info['url']) sub_info['data'] = uf.read().decode() return {l: sub_info['data'] for l, sub_info in subtitles.items()} @is_download_test class TestYoutubeSubtitles(BaseTestSubtitles): # Available subtitles for QRS8MkLhQmM: # Language formats # ru vtt, ttml, srv3, srv2, srv1, json3 # fr vtt, ttml, srv3, srv2, srv1, json3 # en vtt, ttml, srv3, srv2, srv1, json3 # nl vtt, ttml, srv3, srv2, srv1, json3 # de vtt, ttml, srv3, srv2, srv1, json3 # ko vtt, ttml, srv3, srv2, srv1, json3 # it vtt, ttml, srv3, srv2, srv1, json3 # zh-Hant vtt, ttml, srv3, srv2, srv1, json3 # hi vtt, ttml, srv3, srv2, srv1, json3 # pt-BR vtt, ttml, srv3, srv2, srv1, json3 # es-MX vtt, ttml, srv3, srv2, srv1, json3 # ja vtt, ttml, srv3, srv2, srv1, json3 # pl vtt, ttml, srv3, srv2, srv1, json3 url = 'QRS8MkLhQmM' IE = YoutubeIE def test_youtube_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(len(subtitles.keys()), 13) self.assertEqual(md5(subtitles['en']), 'ae1bd34126571a77aabd4d276b28044d') self.assertEqual(md5(subtitles['it']), '0e0b667ba68411d88fd1c5f4f4eab2f9') for lang in ['fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') def _test_subtitles_format(self, fmt, md5_hash, lang='en'): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = fmt subtitles = self.getSubtitles() self.assertEqual(md5(subtitles[lang]), md5_hash) def test_youtube_subtitles_ttml_format(self): self._test_subtitles_format('ttml', 'c97ddf1217390906fa9fbd34901f3da2') def test_youtube_subtitles_vtt_format(self): self._test_subtitles_format('vtt', 'ae1bd34126571a77aabd4d276b28044d') def test_youtube_subtitles_json3_format(self): self._test_subtitles_format('json3', '688dd1ce0981683867e7fe6fde2a224b') def _test_automatic_captions(self, url, lang): self.url = url self.DL.params['writeautomaticsub'] = True self.DL.params['subtitleslangs'] = [lang] subtitles = self.getSubtitles() self.assertTrue(subtitles[lang] is not None) def test_youtube_automatic_captions(self): # Available automatic captions for 8YoUxe5ncPo: # Language formats (all in vtt, ttml, srv3, srv2, srv1, json3) # gu, zh-Hans, zh-Hant, gd, ga, gl, lb, la, lo, tt, tr, # lv, lt, tk, th, tg, te, fil, haw, yi, ceb, yo, de, da, # el, eo, en, eu, et, es, ru, rw, ro, bn, be, bg, uk, jv, # bs, ja, or, xh, co, ca, cy, cs, ps, pt, pa, vi, pl, hy, # hr, ht, hu, hmn, hi, ha, mg, uz, ml, mn, mi, mk, ur, # mt, ms, mr, ug, ta, my, af, sw, is, am, # *it*, iw, sv, ar, # su, zu, az, id, ig, nl, no, ne, ny, fr, ku, fy, fa, fi, # ka, kk, sr, sq, ko, kn, km, st, sk, si, so, sn, sm, sl, # ky, sd # ... self._test_automatic_captions('8YoUxe5ncPo', 'it') @unittest.skip('Video unavailable') def test_youtube_translated_subtitles(self): # This video has a subtitles track, which can be translated (#4555) self._test_automatic_captions('Ky9eprVWzlI', 'it') def test_youtube_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') # Available automatic captions for 8YoUxe5ncPo: # ... # 8YoUxe5ncPo has no subtitles self.url = '8YoUxe5ncPo' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test class TestDailymotionSubtitles(BaseTestSubtitles): url = 'http://www.dailymotion.com/video/xczg00' IE = DailymotionIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) >= 6) self.assertEqual(md5(subtitles['en']), '976553874490cba125086bbfea3ff76f') self.assertEqual(md5(subtitles['fr']), '594564ec7d588942e384e920e5341792') for lang in ['es', 'fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://www.dailymotion.com/video/x12u166_le-zapping-tele-star-du-08-aout-2013_tv' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestTedSubtitles(BaseTestSubtitles): url = 'http://www.ted.com/talks/dan_dennett_on_our_consciousness.html' IE = TedTalkIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertTrue(len(subtitles.keys()) >= 28) self.assertEqual(md5(subtitles['en']), '4262c1665ff928a2dada178f62cb8d14') self.assertEqual(md5(subtitles['fr']), '66a63f7f42c97a50f8c0e90bc7797bb5') for lang in ['es', 'fr', 'de']: self.assertTrue(subtitles.get(lang) is not None, f'Subtitles for \'{lang}\' not extracted') @is_download_test class TestVimeoSubtitles(BaseTestSubtitles): url = 'http://vimeo.com/76979871' IE = VimeoIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'de', 'en', 'es', 'fr'}) self.assertEqual(md5(subtitles['en']), '386cbc9320b94e25cb364b97935e5dd1') self.assertEqual(md5(subtitles['fr']), 'c9b69eef35bc6641c0d4da8a04f9dfac') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://vimeo.com/68093876' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestWallaSubtitles(BaseTestSubtitles): url = 'http://vod.walla.co.il/movie/2705958/the-yes-men' IE = WallaIE def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'heb'}) self.assertEqual(md5(subtitles['heb']), 'e758c5d7cb982f6bef14f377ec7a3920') def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://vod.walla.co.il/movie/2642630/one-direction-all-for-one' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestCeskaTelevizeSubtitles(BaseTestSubtitles): url = 'http://www.ceskatelevize.cz/ivysilani/10600540290-u6-uzasny-svet-techniky' IE = CeskaTelevizeIE def test_allsubtitles(self): self.DL.expect_warning('Automatic Captions not supported by this server') self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'cs'}) self.assertTrue(len(subtitles['cs']) > 20000) def test_nosubtitles(self): self.DL.expect_warning('video doesn\'t have subtitles') self.url = 'http://www.ceskatelevize.cz/ivysilani/ivysilani/10441294653-hyde-park-civilizace/214411058091220' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertFalse(subtitles) @is_download_test @unittest.skip('IE broken') class TestLyndaSubtitles(BaseTestSubtitles): url = 'http://www.lynda.com/Bootstrap-tutorials/Using-exercise-files/110885/114408-4.html' IE = LyndaIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '09bbe67222259bed60deaa26997d73a7') @is_download_test @unittest.skip('IE broken') class TestNPOSubtitles(BaseTestSubtitles): url = 'http://www.npo.nl/nos-journaal/28-08-2014/POW_00722860' IE = NPOIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'nl'}) self.assertEqual(md5(subtitles['nl']), 'fc6435027572b63fb4ab143abd5ad3f4') @is_download_test @unittest.skip('IE broken') class TestMTVSubtitles(BaseTestSubtitles): url = 'http://www.cc.com/video-clips/p63lk0/adam-devine-s-house-party-chasing-white-swans' IE = ComedyCentralIE def getInfoDict(self): return super().getInfoDict()['entries'][0] def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '78206b8d8a0cfa9da64dc026eea48961') @is_download_test class TestNRKSubtitles(BaseTestSubtitles): url = 'http://tv.nrk.no/serie/ikke-gjoer-dette-hjemme/DMPV73000411/sesong-2/episode-1' IE = NRKTVIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'nb-ttv'}) self.assertEqual(md5(subtitles['nb-ttv']), '67e06ff02d0deaf975e68f6cb8f6a149') @is_download_test class TestRaiPlaySubtitles(BaseTestSubtitles): IE = RaiPlayIE def test_subtitles_key(self): self.url = 'http://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), 'b1d90a98755126b61e667567a1f6680a') def test_subtitles_array_key(self): self.url = 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'it'}) self.assertEqual(md5(subtitles['it']), '4b3264186fbb103508abe5311cfcb9cd') @is_download_test @unittest.skip('IE broken - DRM only') class TestVikiSubtitles(BaseTestSubtitles): url = 'http://www.viki.com/videos/1060846v-punch-episode-18' IE = VikiIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '53cb083a5914b2d84ef1ab67b880d18a') @is_download_test class TestThePlatformSubtitles(BaseTestSubtitles): # from http://www.3playmedia.com/services-features/tools/integrations/theplatform/ # (see http://theplatform.com/about/partners/type/subtitles-closed-captioning/) url = 'theplatform:JFUjUE1_ehvq' IE = ThePlatformIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '97e7670cbae3c4d26ae8bcc7fdd78d4b') @is_download_test @unittest.skip('IE broken') class TestThePlatformFeedSubtitles(BaseTestSubtitles): url = 'http://feed.theplatform.com/f/7wvmTC/msnbc_video-p-test?form=json&pretty=true&range=-40&byGuid=n_hardball_5biden_140207' IE = ThePlatformFeedIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), '48649a22e82b2da21c9a67a395eedade') @is_download_test class TestRtveSubtitles(BaseTestSubtitles): url = 'http://www.rtve.es/alacarta/videos/los-misterios-de-laura/misterios-laura-capitulo-32-misterio-del-numero-17-2-parte/2428621/' IE = RTVEALaCartaIE def test_allsubtitles(self): print('Skipping, only available from Spain') return self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'es'}) self.assertEqual(md5(subtitles['es']), '69e70cae2d40574fb7316f31d6eb7fca') @is_download_test class TestDemocracynowSubtitles(BaseTestSubtitles): url = 'http://www.democracynow.org/shows/2015/7/3' IE = DemocracynowIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') def test_subtitles_in_page(self): self.url = 'http://www.democracynow.org/2015/7/3/this_flag_comes_down_today_bree' self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) self.assertEqual(md5(subtitles['en']), 'a3cc4c0b5eadd74d9974f1c1f5101045') @is_download_test class TestPBSSubtitles(BaseTestSubtitles): url = 'https://www.pbs.org/video/how-fantasy-reflects-our-world-picecq/' IE = PBSIE def test_allsubtitles(self): self.DL.params['writesubtitles'] = True self.DL.params['allsubtitles'] = True subtitles = self.getSubtitles() self.assertEqual(set(subtitles.keys()), {'en'}) def test_subtitles_dfxp_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'dfxp' subtitles = self.getSubtitles() self.assertIn(md5(subtitles['en']), ['643b034254cdc3768ff1e750b6b5873b']) def test_subtitles_vtt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'vtt' subtitles = self.getSubtitles() self.assertIn( md5(subtitles['en']), ['937a05711555b165d4c55a9667017045', 'f49ea998d6824d94959c8152a368ff73']) def test_subtitles_srt_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'srt' subtitles = self.getSubtitles() self.assertIn(md5(subtitles['en']), ['2082c21b43759d9bf172931b2f2ca371']) def test_subtitles_sami_format(self): self.DL.params['writesubtitles'] = True self.DL.params['subtitlesformat'] = 'sami' subtitles = self.getSubtitles() self.assertIn(md5(subtitles['en']), ['4256b16ac7da6a6780fafd04294e85cd']) if __name__ == '__main__': unittest.main() �������������������������������������������������yt-dlp-2024.09.27/test/test_traversal.py������������������������������������������������������������0000664�0000000�0000000�00000057312�14675634471�0017756�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import http.cookies import re import xml.etree.ElementTree import pytest from yt_dlp.utils import dict_get, int_or_none, str_or_none from yt_dlp.utils.traversal import traverse_obj _TEST_DATA = { 100: 100, 1.2: 1.2, 'str': 'str', 'None': None, '...': ..., 'urls': [ {'index': 0, 'url': 'https://www.example.com/0'}, {'index': 1, 'url': 'https://www.example.com/1'}, ], 'data': ( {'index': 2}, {'index': 3}, ), 'dict': {}, } class TestTraversal: def test_traversal_base(self): assert traverse_obj(_TEST_DATA, ('str',)) == 'str', \ 'allow tuple path' assert traverse_obj(_TEST_DATA, ['str']) == 'str', \ 'allow list path' assert traverse_obj(_TEST_DATA, (value for value in ('str',))) == 'str', \ 'allow iterable path' assert traverse_obj(_TEST_DATA, 'str') == 'str', \ 'single items should be treated as a path' assert traverse_obj(_TEST_DATA, 100) == 100, \ 'allow int path' assert traverse_obj(_TEST_DATA, 1.2) == 1.2, \ 'allow float path' assert traverse_obj(_TEST_DATA, None) == _TEST_DATA, \ '`None` should not perform any modification' def test_traversal_ellipsis(self): assert traverse_obj(_TEST_DATA, ...) == [x for x in _TEST_DATA.values() if x not in (None, {})], \ '`...` should give all non discarded values' assert traverse_obj(_TEST_DATA, ('urls', 0, ...)) == list(_TEST_DATA['urls'][0].values()), \ '`...` selection for dicts should select all values' assert traverse_obj(_TEST_DATA, (..., ..., 'url')) == ['https://www.example.com/0', 'https://www.example.com/1'], \ 'nested `...` queries should work' assert traverse_obj(_TEST_DATA, (..., ..., 'index')) == list(range(4)), \ '`...` query result should be flattened' assert traverse_obj(iter(range(4)), ...) == list(range(4)), \ '`...` should accept iterables' def test_traversal_function(self): filter_func = lambda x, y: x == 'urls' and isinstance(y, list) assert traverse_obj(_TEST_DATA, filter_func) == [_TEST_DATA['urls']], \ 'function as query key should perform a filter based on (key, value)' assert traverse_obj(_TEST_DATA, lambda _, x: isinstance(x[0], str)) == ['str'], \ 'exceptions in the query function should be catched' assert traverse_obj(iter(range(4)), lambda _, x: x % 2 == 0) == [0, 2], \ 'function key should accept iterables' # Wrong function signature should raise (debug mode) with pytest.raises(Exception): traverse_obj(_TEST_DATA, lambda a: ...) with pytest.raises(Exception): traverse_obj(_TEST_DATA, lambda a, b, c: ...) def test_traversal_set(self): # transformation/type, like `expected_type` assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \ 'Function in set should be a transformation' assert traverse_obj(_TEST_DATA, (..., {str})) == ['str'], \ 'Type in set should be a type filter' assert traverse_obj(_TEST_DATA, (..., {str, int})) == [100, 'str'], \ 'Multiple types in set should be a type filter' assert traverse_obj(_TEST_DATA, {dict}) == _TEST_DATA, \ 'A single set should be wrapped into a path' assert traverse_obj(_TEST_DATA, (..., {str.upper})) == ['STR'], \ 'Transformation function should not raise' expected = [x for x in map(str_or_none, _TEST_DATA.values()) if x is not None] assert traverse_obj(_TEST_DATA, (..., {str_or_none})) == expected, \ 'Function in set should be a transformation' assert traverse_obj(_TEST_DATA, ('fail', {lambda _: 'const'})) == 'const', \ 'Function in set should always be called' # Sets with length < 1 or > 1 not including only types should raise with pytest.raises(Exception): traverse_obj(_TEST_DATA, set()) with pytest.raises(Exception): traverse_obj(_TEST_DATA, {str.upper, str}) def test_traversal_slice(self): _SLICE_DATA = [0, 1, 2, 3, 4] assert traverse_obj(_TEST_DATA, ('dict', slice(1))) is None, \ 'slice on a dictionary should not throw' assert traverse_obj(_SLICE_DATA, slice(1)) == _SLICE_DATA[:1], \ 'slice key should apply slice to sequence' assert traverse_obj(_SLICE_DATA, slice(1, 2)) == _SLICE_DATA[1:2], \ 'slice key should apply slice to sequence' assert traverse_obj(_SLICE_DATA, slice(1, 4, 2)) == _SLICE_DATA[1:4:2], \ 'slice key should apply slice to sequence' def test_traversal_alternatives(self): assert traverse_obj(_TEST_DATA, 'fail', 'str') == 'str', \ 'multiple `paths` should be treated as alternative paths' assert traverse_obj(_TEST_DATA, 'str', 100) == 'str', \ 'alternatives should exit early' assert traverse_obj(_TEST_DATA, 'fail', 'fail') is None, \ 'alternatives should return `default` if exhausted' assert traverse_obj(_TEST_DATA, (..., 'fail'), 100) == 100, \ 'alternatives should track their own branching return' assert traverse_obj(_TEST_DATA, ('dict', ...), ('data', ...)) == list(_TEST_DATA['data']), \ 'alternatives on empty objects should search further' def test_traversal_branching_nesting(self): assert traverse_obj(_TEST_DATA, ('urls', (3, 0), 'url')) == ['https://www.example.com/0'], \ 'tuple as key should be treated as branches' assert traverse_obj(_TEST_DATA, ('urls', [3, 0], 'url')) == ['https://www.example.com/0'], \ 'list as key should be treated as branches' assert traverse_obj(_TEST_DATA, ('urls', ((1, 'fail'), (0, 'url')))) == ['https://www.example.com/0'], \ 'double nesting in path should be treated as paths' assert traverse_obj(['0', [1, 2]], [(0, 1), 0]) == [1], \ 'do not fail early on branching' expected = ['https://www.example.com/0', 'https://www.example.com/1'] assert traverse_obj(_TEST_DATA, ('urls', ((0, ('fail', 'url')), (1, 'url')))) == expected, \ 'tripple nesting in path should be treated as branches' assert traverse_obj(_TEST_DATA, ('urls', ('fail', (..., 'url')))) == expected, \ 'ellipsis as branch path start gets flattened' def test_traversal_dict(self): assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}) == {0: 100, 1: 1.2}, \ 'dict key should result in a dict with the same keys' expected = {0: 'https://www.example.com/0'} assert traverse_obj(_TEST_DATA, {0: ('urls', 0, 'url')}) == expected, \ 'dict key should allow paths' expected = {0: ['https://www.example.com/0']} assert traverse_obj(_TEST_DATA, {0: ('urls', (3, 0), 'url')}) == expected, \ 'tuple in dict path should be treated as branches' assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, 'fail'), (0, 'url')))}) == expected, \ 'double nesting in dict path should be treated as paths' expected = {0: ['https://www.example.com/1', 'https://www.example.com/0']} assert traverse_obj(_TEST_DATA, {0: ('urls', ((1, ('fail', 'url')), (0, 'url')))}) == expected, \ 'tripple nesting in dict path should be treated as branches' assert traverse_obj(_TEST_DATA, {0: 'fail'}) == {}, \ 'remove `None` values when top level dict key fails' assert traverse_obj(_TEST_DATA, {0: 'fail'}, default=...) == {0: ...}, \ 'use `default` if key fails and `default`' assert traverse_obj(_TEST_DATA, {0: 'dict'}) == {}, \ 'remove empty values when dict key' assert traverse_obj(_TEST_DATA, {0: 'dict'}, default=...) == {0: ...}, \ 'use `default` when dict key and `default`' assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}) == {}, \ 'remove empty values when nested dict key fails' assert traverse_obj(None, {0: 'fail'}) == {}, \ 'default to dict if pruned' assert traverse_obj(None, {0: 'fail'}, default=...) == {0: ...}, \ 'default to dict if pruned and default is given' assert traverse_obj(_TEST_DATA, {0: {0: 'fail'}}, default=...) == {0: {0: ...}}, \ 'use nested `default` when nested dict key fails and `default`' assert traverse_obj(_TEST_DATA, {0: ('dict', ...)}) == {}, \ 'remove key if branch in dict key not successful' def test_traversal_default(self): _DEFAULT_DATA = {'None': None, 'int': 0, 'list': []} assert traverse_obj(_DEFAULT_DATA, 'fail') is None, \ 'default value should be `None`' assert traverse_obj(_DEFAULT_DATA, 'fail', 'fail', default=...) == ..., \ 'chained fails should result in default' assert traverse_obj(_DEFAULT_DATA, 'None', 'int') == 0, \ 'should not short cirquit on `None`' assert traverse_obj(_DEFAULT_DATA, 'fail', default=1) == 1, \ 'invalid dict key should result in `default`' assert traverse_obj(_DEFAULT_DATA, 'None', default=1) == 1, \ '`None` is a deliberate sentinel and should become `default`' assert traverse_obj(_DEFAULT_DATA, ('list', 10)) is None, \ '`IndexError` should result in `default`' assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=1) == 1, \ 'if branched but not successful return `default` if defined, not `[]`' assert traverse_obj(_DEFAULT_DATA, (..., 'fail'), default=None) is None, \ 'if branched but not successful return `default` even if `default` is `None`' assert traverse_obj(_DEFAULT_DATA, (..., 'fail')) == [], \ 'if branched but not successful return `[]`, not `default`' assert traverse_obj(_DEFAULT_DATA, ('list', ...)) == [], \ 'if branched but object is empty return `[]`, not `default`' assert traverse_obj(None, ...) == [], \ 'if branched but object is `None` return `[]`, not `default`' assert traverse_obj({0: None}, (0, ...)) == [], \ 'if branched but state is `None` return `[]`, not `default`' @pytest.mark.parametrize('path', [ ('fail', ...), (..., 'fail'), 100 * ('fail',) + (...,), (...,) + 100 * ('fail',), ]) def test_traversal_branching(self, path): assert traverse_obj({}, path) == [], \ 'if branched but state is `None`, return `[]` (not `default`)' assert traverse_obj({}, 'fail', path) == [], \ 'if branching in last alternative and previous did not match, return `[]` (not `default`)' assert traverse_obj({0: 'x'}, 0, path) == 'x', \ 'if branching in last alternative and previous did match, return single value' assert traverse_obj({0: 'x'}, path, 0) == 'x', \ 'if branching in first alternative and non-branching path does match, return single value' assert traverse_obj({}, path, 'fail') is None, \ 'if branching in first alternative and non-branching path does not match, return `default`' def test_traversal_expected_type(self): _EXPECTED_TYPE_DATA = {'str': 'str', 'int': 0} assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=str) == 'str', \ 'accept matching `expected_type` type' assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=int) is None, \ 'reject non matching `expected_type` type' assert traverse_obj(_EXPECTED_TYPE_DATA, 'int', expected_type=lambda x: str(x)) == '0', \ 'transform type using type function' assert traverse_obj(_EXPECTED_TYPE_DATA, 'str', expected_type=lambda _: 1 / 0) is None, \ 'wrap expected_type fuction in try_call' assert traverse_obj(_EXPECTED_TYPE_DATA, ..., expected_type=str) == ['str'], \ 'eliminate items that expected_type fails on' assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2}, expected_type=int) == {0: 100}, \ 'type as expected_type should filter dict values' assert traverse_obj(_TEST_DATA, {0: 100, 1: 1.2, 2: 'None'}, expected_type=str_or_none) == {0: '100', 1: '1.2'}, \ 'function as expected_type should transform dict values' assert traverse_obj(_TEST_DATA, ({0: 1.2}, 0, {int_or_none}), expected_type=int) == 1, \ 'expected_type should not filter non final dict values' assert traverse_obj(_TEST_DATA, {0: {0: 100, 1: 'str'}}, expected_type=int) == {0: {0: 100}}, \ 'expected_type should transform deep dict values' assert traverse_obj(_TEST_DATA, [({0: '...'}, {0: '...'})], expected_type=type(...)) == [{0: ...}, {0: ...}], \ 'expected_type should transform branched dict values' assert traverse_obj({1: {3: 4}}, [(1, 2), 3], expected_type=int) == [4], \ 'expected_type regression for type matching in tuple branching' assert traverse_obj(_TEST_DATA, ['data', ...], expected_type=int) == [], \ 'expected_type regression for type matching in dict result' def test_traversal_get_all(self): _GET_ALL_DATA = {'key': [0, 1, 2]} assert traverse_obj(_GET_ALL_DATA, ('key', ...), get_all=False) == 0, \ 'if not `get_all`, return only first matching value' assert traverse_obj(_GET_ALL_DATA, ..., get_all=False) == [0, 1, 2], \ 'do not overflatten if not `get_all`' def test_traversal_casesense(self): _CASESENSE_DATA = { 'KeY': 'value0', 0: { 'KeY': 'value1', 0: {'KeY': 'value2'}, }, } assert traverse_obj(_CASESENSE_DATA, 'key') is None, \ 'dict keys should be case sensitive unless `casesense`' assert traverse_obj(_CASESENSE_DATA, 'keY', casesense=False) == 'value0', \ 'allow non matching key case if `casesense`' assert traverse_obj(_CASESENSE_DATA, [0, ('keY',)], casesense=False) == ['value1'], \ 'allow non matching key case in branch if `casesense`' assert traverse_obj(_CASESENSE_DATA, [0, ([0, 'keY'],)], casesense=False) == ['value2'], \ 'allow non matching key case in branch path if `casesense`' def test_traversal_traverse_string(self): _TRAVERSE_STRING_DATA = {'str': 'str', 1.2: 1.2} assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0)) is None, \ 'do not traverse into string if not `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', 0), traverse_string=True) == 's', \ 'traverse into string if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, (1.2, 1), traverse_string=True) == '.', \ 'traverse into converted data if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', ...), traverse_string=True) == 'str', \ '`...` should result in string (same value) if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', slice(0, None, 2)), traverse_string=True) == 'sr', \ '`slice` should result in string if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', lambda i, v: i or v == 's'), traverse_string=True) == 'str', \ 'function should result in string if `traverse_string`' assert traverse_obj(_TRAVERSE_STRING_DATA, ('str', (0, 2)), traverse_string=True) == ['s', 'r'], \ 'branching should result in list if `traverse_string`' assert traverse_obj({}, (0, ...), traverse_string=True) == [], \ 'branching should result in list if `traverse_string`' assert traverse_obj({}, (0, lambda x, y: True), traverse_string=True) == [], \ 'branching should result in list if `traverse_string`' assert traverse_obj({}, (0, slice(1)), traverse_string=True) == [], \ 'branching should result in list if `traverse_string`' def test_traversal_re(self): mobj = re.fullmatch(r'0(12)(?P<group>3)(4)?', '0123') assert traverse_obj(mobj, ...) == [x for x in mobj.groups() if x is not None], \ '`...` on a `re.Match` should give its `groups()`' assert traverse_obj(mobj, lambda k, _: k in (0, 2)) == ['0123', '3'], \ 'function on a `re.Match` should give groupno, value starting at 0' assert traverse_obj(mobj, 'group') == '3', \ 'str key on a `re.Match` should give group with that name' assert traverse_obj(mobj, 2) == '3', \ 'int key on a `re.Match` should give group with that name' assert traverse_obj(mobj, 'gRoUp', casesense=False) == '3', \ 'str key on a `re.Match` should respect casesense' assert traverse_obj(mobj, 'fail') is None, \ 'failing str key on a `re.Match` should return `default`' assert traverse_obj(mobj, 'gRoUpS', casesense=False) is None, \ 'failing str key on a `re.Match` should return `default`' assert traverse_obj(mobj, 8) is None, \ 'failing int key on a `re.Match` should return `default`' assert traverse_obj(mobj, lambda k, _: k in (0, 'group')) == ['0123', '3'], \ 'function on a `re.Match` should give group name as well' def test_traversal_xml_etree(self): etree = xml.etree.ElementTree.fromstring('''<?xml version="1.0"?> <data> <country name="Liechtenstein"> <rank>1</rank> <year>2008</year> <gdppc>141100</gdppc> <neighbor name="Austria" direction="E"/> <neighbor name="Switzerland" direction="W"/> </country> <country name="Singapore"> <rank>4</rank> <year>2011</year> <gdppc>59900</gdppc> <neighbor name="Malaysia" direction="N"/> </country> <country name="Panama"> <rank>68</rank> <year>2011</year> <gdppc>13600</gdppc> <neighbor name="Costa Rica" direction="W"/> <neighbor name="Colombia" direction="E"/> </country> </data>''') assert traverse_obj(etree, '') == etree, \ 'empty str key should return the element itself' assert traverse_obj(etree, 'country') == list(etree), \ 'str key should lead all children with that tag name' assert traverse_obj(etree, ...) == list(etree), \ '`...` as key should return all children' assert traverse_obj(etree, lambda _, x: x[0].text == '4') == [etree[1]], \ 'function as key should get element as value' assert traverse_obj(etree, lambda i, _: i == 1) == [etree[1]], \ 'function as key should get index as key' assert traverse_obj(etree, 0) == etree[0], \ 'int key should return the nth child' expected = ['Austria', 'Switzerland', 'Malaysia', 'Costa Rica', 'Colombia'] assert traverse_obj(etree, './/neighbor/@name') == expected, \ '`@<attribute>` at end of path should give that attribute' assert traverse_obj(etree, '//neighbor/@fail') == [None, None, None, None, None], \ '`@<nonexistant>` at end of path should give `None`' assert traverse_obj(etree, ('//neighbor/@', 2)) == {'name': 'Malaysia', 'direction': 'N'}, \ '`@` should give the full attribute dict' assert traverse_obj(etree, '//year/text()') == ['2008', '2011', '2011'], \ '`text()` at end of path should give the inner text' assert traverse_obj(etree, '//*[@direction]/@direction') == ['E', 'W', 'N', 'W', 'E'], \ 'full Python xpath features should be supported' assert traverse_obj(etree, (0, '@name')) == 'Liechtenstein', \ 'special transformations should act on current element' assert traverse_obj(etree, ('country', 0, ..., 'text()', {int_or_none})) == [1, 2008, 141100], \ 'special transformations should act on current element' def test_traversal_unbranching(self): assert traverse_obj(_TEST_DATA, [(100, 1.2), all]) == [100, 1.2], \ '`all` should give all results as list' assert traverse_obj(_TEST_DATA, [(100, 1.2), any]) == 100, \ '`any` should give the first result' assert traverse_obj(_TEST_DATA, [100, all]) == [100], \ '`all` should give list if non branching' assert traverse_obj(_TEST_DATA, [100, any]) == 100, \ '`any` should give single item if non branching' assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), all]) == [100], \ '`all` should filter `None` and empty dict' assert traverse_obj(_TEST_DATA, [('dict', 'None', 100), any]) == 100, \ '`any` should filter `None` and empty dict' assert traverse_obj(_TEST_DATA, [{ 'all': [('dict', 'None', 100, 1.2), all], 'any': [('dict', 'None', 100, 1.2), any], }]) == {'all': [100, 1.2], 'any': 100}, \ '`all`/`any` should apply to each dict path separately' assert traverse_obj(_TEST_DATA, [{ 'all': [('dict', 'None', 100, 1.2), all], 'any': [('dict', 'None', 100, 1.2), any], }], get_all=False) == {'all': [100, 1.2], 'any': 100}, \ '`all`/`any` should apply to dict regardless of `get_all`' assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, {float}]) is None, \ '`all` should reset branching status' assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), any, {float}]) is None, \ '`any` should reset branching status' assert traverse_obj(_TEST_DATA, [('dict', 'None', 100, 1.2), all, ..., {float}]) == [1.2], \ '`all` should allow further branching' assert traverse_obj(_TEST_DATA, [('dict', 'None', 'urls', 'data'), any, ..., 'index']) == [0, 1], \ '`any` should allow further branching' def test_traversal_morsel(self): values = { 'expires': 'a', 'path': 'b', 'comment': 'c', 'domain': 'd', 'max-age': 'e', 'secure': 'f', 'httponly': 'g', 'version': 'h', 'samesite': 'i', } morsel = http.cookies.Morsel() morsel.set('item_key', 'item_value', 'coded_value') morsel.update(values) values['key'] = 'item_key' values['value'] = 'item_value' for key, value in values.items(): assert traverse_obj(morsel, key) == value, \ 'Morsel should provide access to all values' assert traverse_obj(morsel, ...) == list(values.values()), \ '`...` should yield all values' assert traverse_obj(morsel, lambda k, v: True) == list(values.values()), \ 'function key should yield all values' assert traverse_obj(morsel, [(None,), any]) == morsel, \ 'Morsel should not be implicitly changed to dict on usage' class TestDictGet: def test_dict_get(self): FALSE_VALUES = { 'none': None, 'false': False, 'zero': 0, 'empty_string': '', 'empty_list': [], } d = {**FALSE_VALUES, 'a': 42} assert dict_get(d, 'a') == 42 assert dict_get(d, 'b') is None assert dict_get(d, 'b', 42) == 42 assert dict_get(d, ('a',)) == 42 assert dict_get(d, ('b', 'a')) == 42 assert dict_get(d, ('b', 'c', 'a', 'd')) == 42 assert dict_get(d, ('b', 'c')) is None assert dict_get(d, ('b', 'c'), 42) == 42 for key, false_value in FALSE_VALUES.items(): assert dict_get(d, ('b', 'c', key)) is None assert dict_get(d, ('b', 'c', key), skip_false_values=False) == false_value ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_update.py���������������������������������������������������������������0000664�0000000�0000000�00000024625�14675634471�0017236�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, report_warning from yt_dlp.update import UpdateInfo, Updater # XXX: Keep in sync with yt_dlp.update.UPDATE_SOURCES TEST_UPDATE_SOURCES = { 'stable': 'yt-dlp/yt-dlp', 'nightly': 'yt-dlp/yt-dlp-nightly-builds', 'master': 'yt-dlp/yt-dlp-master-builds', } TEST_API_DATA = { 'yt-dlp/yt-dlp/latest': { 'tag_name': '2023.12.31', 'target_commitish': 'bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb', 'name': 'yt-dlp 2023.12.31', 'body': 'BODY', }, 'yt-dlp/yt-dlp-nightly-builds/latest': { 'tag_name': '2023.12.31.123456', 'target_commitish': 'master', 'name': 'yt-dlp nightly 2023.12.31.123456', 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/cccccccccccccccccccccccccccccccccccccccc', }, 'yt-dlp/yt-dlp-master-builds/latest': { 'tag_name': '2023.12.31.987654', 'target_commitish': 'master', 'name': 'yt-dlp master 2023.12.31.987654', 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/dddddddddddddddddddddddddddddddddddddddd', }, 'yt-dlp/yt-dlp/tags/testing': { 'tag_name': 'testing', 'target_commitish': '9999999999999999999999999999999999999999', 'name': 'testing', 'body': 'BODY', }, 'fork/yt-dlp/latest': { 'tag_name': '2050.12.31', 'target_commitish': 'eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee', 'name': '2050.12.31', 'body': 'BODY', }, 'fork/yt-dlp/tags/pr0000': { 'tag_name': 'pr0000', 'target_commitish': 'ffffffffffffffffffffffffffffffffffffffff', 'name': 'pr1234 2023.11.11.000000', 'body': 'BODY', }, 'fork/yt-dlp/tags/pr1234': { 'tag_name': 'pr1234', 'target_commitish': '0000000000000000000000000000000000000000', 'name': 'pr1234 2023.12.31.555555', 'body': 'BODY', }, 'fork/yt-dlp/tags/pr9999': { 'tag_name': 'pr9999', 'target_commitish': '1111111111111111111111111111111111111111', 'name': 'pr9999', 'body': 'BODY', }, 'fork/yt-dlp-satellite/tags/pr987': { 'tag_name': 'pr987', 'target_commitish': 'master', 'name': 'pr987', 'body': 'Generated from: https://github.com/yt-dlp/yt-dlp/commit/2222222222222222222222222222222222222222', }, } TEST_LOCKFILE_COMMENT = '# This file is used for regulating self-update' TEST_LOCKFILE_V1 = rf'''{TEST_LOCKFILE_COMMENT} lock 2022.08.18.36 .+ Python 3\.6 lock 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lock 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) ''' TEST_LOCKFILE_V2_TMPL = r'''%s lockV2 yt-dlp/yt-dlp 2022.08.18.36 .+ Python 3\.6 lockV2 yt-dlp/yt-dlp 2023.11.16 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp 2023.11.16 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-nightly-builds 2023.11.15.232826 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 (?!win_x86_exe).+ Python 3\.7 lockV2 yt-dlp/yt-dlp-master-builds 2023.11.15.232812 win_x86_exe .+ Windows-(?:Vista|2008Server) ''' TEST_LOCKFILE_V2 = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_COMMENT TEST_LOCKFILE_ACTUAL = TEST_LOCKFILE_V2_TMPL % TEST_LOCKFILE_V1.rstrip('\n') TEST_LOCKFILE_FORK = rf'''{TEST_LOCKFILE_ACTUAL}# Test if a fork blocks updates to non-numeric tags lockV2 fork/yt-dlp pr0000 .+ Python 3.6 lockV2 fork/yt-dlp pr1234 (?!win_x86_exe).+ Python 3\.7 lockV2 fork/yt-dlp pr1234 win_x86_exe .+ Windows-(?:Vista|2008Server) lockV2 fork/yt-dlp pr9999 .+ Python 3.11 ''' class FakeUpdater(Updater): current_version = '2022.01.01' current_commit = 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' _channel = 'stable' _origin = 'yt-dlp/yt-dlp' _update_sources = TEST_UPDATE_SOURCES def _download_update_spec(self, *args, **kwargs): return TEST_LOCKFILE_ACTUAL def _call_api(self, tag): tag = f'tags/{tag}' if tag != 'latest' else tag return TEST_API_DATA[f'{self.requested_repo}/{tag}'] def _report_error(self, msg, *args, **kwargs): report_warning(msg) class TestUpdate(unittest.TestCase): maxDiff = None def test_update_spec(self): ydl = FakeYDL() updater = FakeUpdater(ydl, 'stable') def test(lockfile, identifier, input_tag, expect_tag, exact=False, repo='yt-dlp/yt-dlp'): updater._identifier = identifier updater._exact = exact updater.requested_repo = repo result = updater._process_update_spec(lockfile, input_tag) self.assertEqual( result, expect_tag, f'{identifier!r} requesting {repo}@{input_tag} (exact={exact}) ' f'returned {result!r} instead of {expect_tag!r}') for lockfile in (TEST_LOCKFILE_V1, TEST_LOCKFILE_V2, TEST_LOCKFILE_ACTUAL, TEST_LOCKFILE_FORK): # Normal operation test(lockfile, 'zip Python 3.12.0', '2023.12.31', '2023.12.31') test(lockfile, 'zip stable Python 3.12.0', '2023.12.31', '2023.12.31', exact=True) # Python 3.6 --update should update only to its lock test(lockfile, 'zip Python 3.6.0', '2023.11.16', '2022.08.18.36') # --update-to an exact version later than the lock should return None test(lockfile, 'zip stable Python 3.6.0', '2023.11.16', None, exact=True) # Python 3.7 should be able to update to its lock test(lockfile, 'zip Python 3.7.0', '2023.11.16', '2023.11.16') test(lockfile, 'zip stable Python 3.7.1', '2023.11.16', '2023.11.16', exact=True) # Non-win_x86_exe builds on py3.7 must be locked test(lockfile, 'zip Python 3.7.1', '2023.12.31', '2023.11.16') test(lockfile, 'zip stable Python 3.7.1', '2023.12.31', None, exact=True) test( # Windows Vista w/ win_x86_exe must be locked lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', '2023.12.31', '2023.11.16') test( # Windows 2008Server w/ win_x86_exe must be locked lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-2008Server', '2023.12.31', None, exact=True) test( # Windows 7 w/ win_x86_exe py3.7 build should be able to update beyond lock lockfile, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', '2023.12.31', '2023.12.31') test( # Windows 8.1 w/ '2008Server' in platform string should be able to update beyond lock lockfile, 'win_x86_exe Python 3.7.9 (CPython x86 32bit) - Windows-post2008Server-6.2.9200', '2023.12.31', '2023.12.31', exact=True) # Forks can block updates to non-numeric tags rather than lock test(TEST_LOCKFILE_FORK, 'zip Python 3.6.3', 'pr0000', None, repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr0000', 'pr0000', repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip stable Python 3.7.4', 'pr1234', None, repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.8.1', 'pr1234', 'pr1234', repo='fork/yt-dlp', exact=True) test( TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-Vista-6.0.6003-SP2', 'pr1234', None, repo='fork/yt-dlp') test( TEST_LOCKFILE_FORK, 'win_x86_exe stable Python 3.7.9 (CPython x86 32bit) - Windows-7-6.1.7601-SP1', '2023.12.31', '2023.12.31', repo='fork/yt-dlp') test(TEST_LOCKFILE_FORK, 'zip Python 3.11.2', 'pr9999', None, repo='fork/yt-dlp', exact=True) test(TEST_LOCKFILE_FORK, 'zip stable Python 3.12.0', 'pr9999', 'pr9999', repo='fork/yt-dlp') def test_query_update(self): ydl = FakeYDL() def test(target, expected, current_version=None, current_commit=None, identifier=None): updater = FakeUpdater(ydl, target) if current_version: updater.current_version = current_version if current_commit: updater.current_commit = current_commit updater._identifier = identifier or 'zip' update_info = updater.query_update(_output=True) self.assertDictEqual( update_info.__dict__ if update_info else {}, expected.__dict__ if expected else {}) test('yt-dlp/yt-dlp@latest', UpdateInfo( '2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40)) test('yt-dlp/yt-dlp-nightly-builds@latest', UpdateInfo( '2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40)) test('yt-dlp/yt-dlp-master-builds@latest', UpdateInfo( '2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40)) test('fork/yt-dlp@latest', UpdateInfo( '2050.12.31', version='2050.12.31', requested_version='2050.12.31', commit='e' * 40)) test('fork/yt-dlp@pr0000', UpdateInfo( 'pr0000', version='2023.11.11.000000', requested_version='2023.11.11.000000', commit='f' * 40)) test('fork/yt-dlp@pr1234', UpdateInfo( 'pr1234', version='2023.12.31.555555', requested_version='2023.12.31.555555', commit='0' * 40)) test('fork/yt-dlp@pr9999', UpdateInfo( 'pr9999', version=None, requested_version=None, commit='1' * 40)) test('fork/yt-dlp-satellite@pr987', UpdateInfo( 'pr987', version=None, requested_version=None, commit='2' * 40)) test('yt-dlp/yt-dlp', None, current_version='2024.01.01') test('stable', UpdateInfo( '2023.12.31', version='2023.12.31', requested_version='2023.12.31', commit='b' * 40)) test('nightly', UpdateInfo( '2023.12.31.123456', version='2023.12.31.123456', requested_version='2023.12.31.123456', commit='c' * 40)) test('master', UpdateInfo( '2023.12.31.987654', version='2023.12.31.987654', requested_version='2023.12.31.987654', commit='d' * 40)) test('testing', None, current_commit='9' * 40) test('testing', UpdateInfo('testing', commit='9' * 40)) if __name__ == '__main__': unittest.main() �����������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_utils.py����������������������������������������������������������������0000664�0000000�0000000�00000303710�14675634471�0017107�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest import warnings import datetime as dt sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import io import itertools import json import subprocess import xml.etree.ElementTree from yt_dlp.compat import ( compat_etree_fromstring, compat_HTMLParseError, compat_os_name, ) from yt_dlp.utils import ( Config, DateRange, ExtractorError, InAdvancePagedList, LazyList, NO_DEFAULT, OnDemandPagedList, Popen, age_restricted, args_to_str, base_url, caesar, clean_html, clean_podcast_url, cli_bool_option, cli_option, cli_valueless_option, date_from_str, datetime_from_str, detect_exe_version, determine_ext, determine_file_encoding, dfxp2srt, encode_base_n, encode_compat_str, encodeFilename, expand_path, extract_attributes, extract_basic_auth, find_xpath_attr, fix_xml_ampersands, float_or_none, format_bytes, get_compatible_ext, get_element_by_attribute, get_element_by_class, get_element_html_by_attribute, get_element_html_by_class, get_element_text_and_html_by_tag, get_elements_by_attribute, get_elements_by_class, get_elements_html_by_attribute, get_elements_html_by_class, get_elements_text_and_html_by_attribute, int_or_none, intlist_to_bytes, iri_to_uri, is_html, js_to_json, limit_length, locked_file, lowercase_escape, match_str, merge_dicts, mimetype2ext, month_by_name, multipart_encode, ohdave_rsa_encrypt, orderedSet, parse_age_limit, parse_bitrate, parse_codecs, parse_count, parse_dfxp_time_expr, parse_duration, parse_filesize, parse_iso8601, parse_qs, parse_resolution, pkcs1pad, prepend_extension, read_batch_urls, remove_end, remove_quotes, remove_start, render_table, replace_extension, rot47, sanitize_filename, sanitize_path, sanitize_url, shell_quote, smuggle_url, str_to_int, strip_jsonp, strip_or_none, subtitles_filename, timeconvert, try_call, unescapeHTML, unified_strdate, unified_timestamp, unsmuggle_url, update_url_query, uppercase_escape, url_basename, url_or_none, urlencode_postdata, urljoin, urshift, variadic, version_tuple, xpath_attr, xpath_element, xpath_text, xpath_with_ns, ) from yt_dlp.utils._utils import _UnsafeExtensionError from yt_dlp.utils.networking import ( HTTPHeaderDict, escape_rfc3986, normalize_url, remove_dot_segments, ) class TestUtil(unittest.TestCase): def test_timeconvert(self): self.assertTrue(timeconvert('') is None) self.assertTrue(timeconvert('bougrg') is None) def test_sanitize_filename(self): self.assertEqual(sanitize_filename(''), '') self.assertEqual(sanitize_filename('abc'), 'abc') self.assertEqual(sanitize_filename('abc_d-e'), 'abc_d-e') self.assertEqual(sanitize_filename('123'), '123') self.assertEqual('abc⧸de', sanitize_filename('abc/de')) self.assertFalse('/' in sanitize_filename('abc/de///')) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', is_id=False)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', is_id=False)) self.assertEqual('yes no', sanitize_filename('yes? no', is_id=False)) self.assertEqual('this - that', sanitize_filename('this: that', is_id=False)) self.assertEqual(sanitize_filename('AT&T'), 'AT&T') aumlaut = 'ä' self.assertEqual(sanitize_filename(aumlaut), aumlaut) tests = '\u043a\u0438\u0440\u0438\u043b\u043b\u0438\u0446\u0430' self.assertEqual(sanitize_filename(tests), tests) self.assertEqual( sanitize_filename('New World record at 0:12:34'), 'New World record at 0_12_34') self.assertEqual(sanitize_filename('--gasdgf'), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=True), '--gasdgf') self.assertEqual(sanitize_filename('--gasdgf', is_id=False), '_-gasdgf') self.assertEqual(sanitize_filename('.gasdgf'), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=True), '.gasdgf') self.assertEqual(sanitize_filename('.gasdgf', is_id=False), 'gasdgf') forbidden = '"\0\\/' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc)) def test_sanitize_filename_restricted(self): self.assertEqual(sanitize_filename('abc', restricted=True), 'abc') self.assertEqual(sanitize_filename('abc_d-e', restricted=True), 'abc_d-e') self.assertEqual(sanitize_filename('123', restricted=True), '123') self.assertEqual('abc_de', sanitize_filename('abc/de', restricted=True)) self.assertFalse('/' in sanitize_filename('abc/de///', restricted=True)) self.assertEqual('abc_de', sanitize_filename('abc/<>\\*|de', restricted=True)) self.assertEqual('xxx', sanitize_filename('xxx/<>\\*|', restricted=True)) self.assertEqual('yes_no', sanitize_filename('yes? no', restricted=True)) self.assertEqual('this_-_that', sanitize_filename('this: that', restricted=True)) tests = 'aäb\u4e2d\u56fd\u7684c' self.assertEqual(sanitize_filename(tests, restricted=True), 'aab_c') self.assertTrue(sanitize_filename('\xf6', restricted=True) != '') # No empty filename forbidden = '"\0\\/&!: \'\t\n()[]{}$;`^,#' for fc in forbidden: for fbc in forbidden: self.assertTrue(fbc not in sanitize_filename(fc, restricted=True)) # Handle a common case more neatly self.assertEqual(sanitize_filename('\u5927\u58f0\u5e26 - Song', restricted=True), 'Song') self.assertEqual(sanitize_filename('\u603b\u7edf: Speech', restricted=True), 'Speech') # .. but make sure the file name is never empty self.assertTrue(sanitize_filename('-', restricted=True) != '') self.assertTrue(sanitize_filename(':', restricted=True) != '') self.assertEqual(sanitize_filename( 'ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ', restricted=True), 'AAAAAAAECEEEEIIIIDNOOOOOOOOEUUUUUYTHssaaaaaaaeceeeeiiiionooooooooeuuuuuythy') def test_sanitize_ids(self): self.assertEqual(sanitize_filename('_n_cd26wFpw', is_id=True), '_n_cd26wFpw') self.assertEqual(sanitize_filename('_BD_eEpuzXw', is_id=True), '_BD_eEpuzXw') self.assertEqual(sanitize_filename('N0Y__7-UOdI', is_id=True), 'N0Y__7-UOdI') def test_sanitize_path(self): if sys.platform != 'win32': return self.assertEqual(sanitize_path('abc'), 'abc') self.assertEqual(sanitize_path('abc/def'), 'abc\\def') self.assertEqual(sanitize_path('abc\\def'), 'abc\\def') self.assertEqual(sanitize_path('abc|def'), 'abc#def') self.assertEqual(sanitize_path('<>:"|?*'), '#######') self.assertEqual(sanitize_path('C:/abc/def'), 'C:\\abc\\def') self.assertEqual(sanitize_path('C?:/abc/def'), 'C##\\abc\\def') self.assertEqual(sanitize_path('\\\\?\\UNC\\ComputerName\\abc'), '\\\\?\\UNC\\ComputerName\\abc') self.assertEqual(sanitize_path('\\\\?\\UNC/ComputerName/abc'), '\\\\?\\UNC\\ComputerName\\abc') self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') self.assertEqual(sanitize_path('\\\\?\\C:/abc'), '\\\\?\\C:\\abc') self.assertEqual(sanitize_path('\\\\?\\C:\\ab?c\\de:f'), '\\\\?\\C:\\ab#c\\de#f') self.assertEqual(sanitize_path('\\\\?\\C:\\abc'), '\\\\?\\C:\\abc') self.assertEqual( sanitize_path('youtube/%(uploader)s/%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s'), 'youtube\\%(uploader)s\\%(autonumber)s-%(title)s-%(upload_date)s.%(ext)s') self.assertEqual( sanitize_path('youtube/TheWreckingYard ./00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part'), 'youtube\\TheWreckingYard #\\00001-Not bad, Especially for Free! (1987 Yamaha 700)-20141116.mp4.part') self.assertEqual(sanitize_path('abc/def...'), 'abc\\def..#') self.assertEqual(sanitize_path('abc.../def'), 'abc..#\\def') self.assertEqual(sanitize_path('abc.../def...'), 'abc..#\\def..#') self.assertEqual(sanitize_path('../abc'), '..\\abc') self.assertEqual(sanitize_path('../../abc'), '..\\..\\abc') self.assertEqual(sanitize_path('./abc'), 'abc') self.assertEqual(sanitize_path('./../abc'), '..\\abc') def test_sanitize_url(self): self.assertEqual(sanitize_url('//foo.bar'), 'http://foo.bar') self.assertEqual(sanitize_url('httpss://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('rmtps://foo.bar'), 'rtmps://foo.bar') self.assertEqual(sanitize_url('https://foo.bar'), 'https://foo.bar') self.assertEqual(sanitize_url('foo bar'), 'foo bar') def test_expand_path(self): def env(var): return f'%{var}%' if sys.platform == 'win32' else f'${var}' os.environ['yt_dlp_EXPATH_PATH'] = 'expanded' self.assertEqual(expand_path(env('yt_dlp_EXPATH_PATH')), 'expanded') old_home = os.environ.get('HOME') test_str = R'C:\Documents and Settings\тест\Application Data' try: os.environ['HOME'] = test_str self.assertEqual(expand_path(env('HOME')), os.getenv('HOME')) self.assertEqual(expand_path('~'), os.getenv('HOME')) self.assertEqual( expand_path('~/{}'.format(env('yt_dlp_EXPATH_PATH'))), '{}/expanded'.format(os.getenv('HOME'))) finally: os.environ['HOME'] = old_home or '' _uncommon_extensions = [ ('exe', 'abc.exe.ext'), ('de', 'abc.de.ext'), ('../.mp4', None), ('..\\.mp4', None), ] def test_prepend_extension(self): self.assertEqual(prepend_extension('abc.ext', 'temp'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.ext', 'temp', 'ext'), 'abc.temp.ext') self.assertEqual(prepend_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') self.assertEqual(prepend_extension('abc', 'temp'), 'abc.temp') self.assertEqual(prepend_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(prepend_extension('.abc.ext', 'temp'), '.abc.temp.ext') # Test uncommon extensions self.assertEqual(prepend_extension('abc.ext', 'bin'), 'abc.bin.ext') for ext, result in self._uncommon_extensions: with self.assertRaises(_UnsafeExtensionError): prepend_extension('abc', ext) if result: self.assertEqual(prepend_extension('abc.ext', ext, 'ext'), result) else: with self.assertRaises(_UnsafeExtensionError): prepend_extension('abc.ext', ext, 'ext') with self.assertRaises(_UnsafeExtensionError): prepend_extension('abc.unexpected_ext', ext, 'ext') def test_replace_extension(self): self.assertEqual(replace_extension('abc.ext', 'temp'), 'abc.temp') self.assertEqual(replace_extension('abc.ext', 'temp', 'ext'), 'abc.temp') self.assertEqual(replace_extension('abc.unexpected_ext', 'temp', 'ext'), 'abc.unexpected_ext.temp') self.assertEqual(replace_extension('abc', 'temp'), 'abc.temp') self.assertEqual(replace_extension('.abc', 'temp'), '.abc.temp') self.assertEqual(replace_extension('.abc.ext', 'temp'), '.abc.temp') # Test uncommon extensions self.assertEqual(replace_extension('abc.ext', 'bin'), 'abc.unknown_video') for ext, _ in self._uncommon_extensions: with self.assertRaises(_UnsafeExtensionError): replace_extension('abc', ext) with self.assertRaises(_UnsafeExtensionError): replace_extension('abc.ext', ext, 'ext') with self.assertRaises(_UnsafeExtensionError): replace_extension('abc.unexpected_ext', ext, 'ext') def test_subtitles_filename(self): self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.ext', 'en', 'vtt', 'ext'), 'abc.en.vtt') self.assertEqual(subtitles_filename('abc.unexpected_ext', 'en', 'vtt', 'ext'), 'abc.unexpected_ext.en.vtt') def test_remove_start(self): self.assertEqual(remove_start(None, 'A - '), None) self.assertEqual(remove_start('A - B', 'A - '), 'B') self.assertEqual(remove_start('B - A', 'A - '), 'B - A') def test_remove_end(self): self.assertEqual(remove_end(None, ' - B'), None) self.assertEqual(remove_end('A - B', ' - B'), 'A') self.assertEqual(remove_end('B - A', ' - B'), 'B - A') def test_remove_quotes(self): self.assertEqual(remove_quotes(None), None) self.assertEqual(remove_quotes('"'), '"') self.assertEqual(remove_quotes("'"), "'") self.assertEqual(remove_quotes(';'), ';') self.assertEqual(remove_quotes('";'), '";') self.assertEqual(remove_quotes('""'), '') self.assertEqual(remove_quotes('";"'), ';') def test_ordered_set(self): self.assertEqual(orderedSet([1, 1, 2, 3, 4, 4, 5, 6, 7, 3, 5]), [1, 2, 3, 4, 5, 6, 7]) self.assertEqual(orderedSet([]), []) self.assertEqual(orderedSet([1]), [1]) # keep the list ordered self.assertEqual(orderedSet([135, 1, 1, 1]), [135, 1]) def test_unescape_html(self): self.assertEqual(unescapeHTML('%20;'), '%20;') self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('/'), '/') self.assertEqual(unescapeHTML('é'), 'é') self.assertEqual(unescapeHTML('�'), '�') self.assertEqual(unescapeHTML('&a"'), '&a"') # HTML5 entities self.assertEqual(unescapeHTML('.''), '.\'') def test_date_from_str(self): self.assertEqual(date_from_str('yesterday'), date_from_str('now-1day')) self.assertEqual(date_from_str('now+7day'), date_from_str('now+1week')) self.assertEqual(date_from_str('now+14day'), date_from_str('now+2week')) self.assertEqual(date_from_str('20200229+365day'), date_from_str('20200229+1year')) self.assertEqual(date_from_str('20210131+28day'), date_from_str('20210131+1month')) def test_datetime_from_str(self): self.assertEqual(datetime_from_str('yesterday', precision='day'), datetime_from_str('now-1day', precision='auto')) self.assertEqual(datetime_from_str('now+7day', precision='day'), datetime_from_str('now+1week', precision='auto')) self.assertEqual(datetime_from_str('now+14day', precision='day'), datetime_from_str('now+2week', precision='auto')) self.assertEqual(datetime_from_str('20200229+365day', precision='day'), datetime_from_str('20200229+1year', precision='auto')) self.assertEqual(datetime_from_str('20210131+28day', precision='day'), datetime_from_str('20210131+1month', precision='auto')) self.assertEqual(datetime_from_str('20210131+59day', precision='day'), datetime_from_str('20210131+2month', precision='auto')) self.assertEqual(datetime_from_str('now+1day', precision='hour'), datetime_from_str('now+24hours', precision='auto')) self.assertEqual(datetime_from_str('now+23hours', precision='hour'), datetime_from_str('now+23hours', precision='auto')) def test_daterange(self): _20century = DateRange('19000101', '20000101') self.assertFalse('17890714' in _20century) _ac = DateRange('00010101') self.assertTrue('19690721' in _ac) _firstmilenium = DateRange(end='10000101') self.assertTrue('07110427' in _firstmilenium) def test_unified_dates(self): self.assertEqual(unified_strdate('December 21, 2010'), '20101221') self.assertEqual(unified_strdate('8/7/2009'), '20090708') self.assertEqual(unified_strdate('Dec 14, 2012'), '20121214') self.assertEqual(unified_strdate('2012/10/11 01:56:38 +0000'), '20121011') self.assertEqual(unified_strdate('1968 12 10'), '19681210') self.assertEqual(unified_strdate('1968-12-10'), '19681210') self.assertEqual(unified_strdate('31-07-2022 20:00'), '20220731') self.assertEqual(unified_strdate('28/01/2014 21:00:00 +0100'), '20140128') self.assertEqual( unified_strdate('11/26/2014 11:30:00 AM PST', day_first=False), '20141126') self.assertEqual( unified_strdate('2/2/2015 6:47:40 PM', day_first=False), '20150202') self.assertEqual(unified_strdate('Feb 14th 2016 5:45PM'), '20160214') self.assertEqual(unified_strdate('25-09-2014'), '20140925') self.assertEqual(unified_strdate('27.02.2016 17:30'), '20160227') self.assertEqual(unified_strdate('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_strdate('Feb 7, 2016 at 6:35 pm'), '20160207') self.assertEqual(unified_strdate('July 15th, 2013'), '20130715') self.assertEqual(unified_strdate('September 1st, 2013'), '20130901') self.assertEqual(unified_strdate('Sep 2nd, 2013'), '20130902') self.assertEqual(unified_strdate('November 3rd, 2019'), '20191103') self.assertEqual(unified_strdate('October 23rd, 2005'), '20051023') def test_unified_timestamps(self): self.assertEqual(unified_timestamp('December 21, 2010'), 1292889600) self.assertEqual(unified_timestamp('8/7/2009'), 1247011200) self.assertEqual(unified_timestamp('Dec 14, 2012'), 1355443200) self.assertEqual(unified_timestamp('2012/10/11 01:56:38 +0000'), 1349920598) self.assertEqual(unified_timestamp('1968 12 10'), -33436800) self.assertEqual(unified_timestamp('1968-12-10'), -33436800) self.assertEqual(unified_timestamp('28/01/2014 21:00:00 +0100'), 1390939200) self.assertEqual( unified_timestamp('11/26/2014 11:30:00 AM PST', day_first=False), 1417001400) self.assertEqual( unified_timestamp('2/2/2015 6:47:40 PM', day_first=False), 1422902860) self.assertEqual(unified_timestamp('Feb 14th 2016 5:45PM'), 1455471900) self.assertEqual(unified_timestamp('25-09-2014'), 1411603200) self.assertEqual(unified_timestamp('27.02.2016 17:30'), 1456594200) self.assertEqual(unified_timestamp('UNKNOWN DATE FORMAT'), None) self.assertEqual(unified_timestamp('May 16, 2016 11:15 PM'), 1463440500) self.assertEqual(unified_timestamp('Feb 7, 2016 at 6:35 pm'), 1454870100) self.assertEqual(unified_timestamp('2017-03-30T17:52:41Q'), 1490896361) self.assertEqual(unified_timestamp('Sep 11, 2013 | 5:49 AM'), 1378878540) self.assertEqual(unified_timestamp('December 15, 2017 at 7:49 am'), 1513324140) self.assertEqual(unified_timestamp('2018-03-14T08:32:43.1493874+00:00'), 1521016363) self.assertEqual(unified_timestamp('Sunday, 26 Nov 2006, 19:00'), 1164567600) self.assertEqual(unified_timestamp('wed, aug 16, 2008, 12:00pm'), 1218931200) self.assertEqual(unified_timestamp('December 31 1969 20:00:01 EDT'), 1) self.assertEqual(unified_timestamp('Wednesday 31 December 1969 18:01:26 MDT'), 86) self.assertEqual(unified_timestamp('12/31/1969 20:01:18 EDT', False), 78) def test_determine_ext(self): self.assertEqual(determine_ext('http://example.com/foo/bar.mp4/?download'), 'mp4') self.assertEqual(determine_ext('http://example.com/foo/bar/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.nonext/?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar/mp4?download', None), None) self.assertEqual(determine_ext('http://example.com/foo/bar.m3u8//?download'), 'm3u8') self.assertEqual(determine_ext('foobar', None), None) def test_find_xpath_attr(self): testxml = '''<root> <node/> <node x="a"/> <node x="a" y="c" /> <node x="b" y="d" /> <node x="" /> </root>''' doc = compat_etree_fromstring(testxml) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n'), None) self.assertEqual(find_xpath_attr(doc, './/fourohfour', 'n', 'v'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'n'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'n', 'v'), None) self.assertEqual(find_xpath_attr(doc, './/node', 'x'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'a'), doc[1]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', 'b'), doc[3]) self.assertEqual(find_xpath_attr(doc, './/node', 'y'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'c'), doc[2]) self.assertEqual(find_xpath_attr(doc, './/node', 'y', 'd'), doc[3]) self.assertEqual(find_xpath_attr(doc, './/node', 'x', ''), doc[4]) def test_xpath_with_ns(self): testxml = '''<root xmlns:media="http://example.com/"> <media:song> <media:author>The Author</media:author> <url>http://server.com/download.mp3</url> </media:song> </root>''' doc = compat_etree_fromstring(testxml) find = lambda p: doc.find(xpath_with_ns(p, {'media': 'http://example.com/'})) self.assertTrue(find('media:song') is not None) self.assertEqual(find('media:song/media:author').text, 'The Author') self.assertEqual(find('media:song/url').text, 'http://server.com/download.mp3') def test_xpath_element(self): doc = xml.etree.ElementTree.Element('root') div = xml.etree.ElementTree.SubElement(doc, 'div') p = xml.etree.ElementTree.SubElement(div, 'p') p.text = 'Foo' self.assertEqual(xpath_element(doc, 'div/p'), p) self.assertEqual(xpath_element(doc, ['div/p']), p) self.assertEqual(xpath_element(doc, ['div/bar', 'div/p']), p) self.assertEqual(xpath_element(doc, 'div/bar', default='default'), 'default') self.assertEqual(xpath_element(doc, ['div/bar'], default='default'), 'default') self.assertTrue(xpath_element(doc, 'div/bar') is None) self.assertTrue(xpath_element(doc, ['div/bar']) is None) self.assertTrue(xpath_element(doc, ['div/bar'], 'div/baz') is None) self.assertRaises(ExtractorError, xpath_element, doc, 'div/bar', fatal=True) self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar'], fatal=True) self.assertRaises(ExtractorError, xpath_element, doc, ['div/bar', 'div/baz'], fatal=True) def test_xpath_text(self): testxml = '''<root> <div> <p>Foo</p> </div> </root>''' doc = compat_etree_fromstring(testxml) self.assertEqual(xpath_text(doc, 'div/p'), 'Foo') self.assertEqual(xpath_text(doc, 'div/bar', default='default'), 'default') self.assertTrue(xpath_text(doc, 'div/bar') is None) self.assertRaises(ExtractorError, xpath_text, doc, 'div/bar', fatal=True) def test_xpath_attr(self): testxml = '''<root> <div> <p x="a">Foo</p> </div> </root>''' doc = compat_etree_fromstring(testxml) self.assertEqual(xpath_attr(doc, 'div/p', 'x'), 'a') self.assertEqual(xpath_attr(doc, 'div/bar', 'x'), None) self.assertEqual(xpath_attr(doc, 'div/p', 'y'), None) self.assertEqual(xpath_attr(doc, 'div/bar', 'x', default='default'), 'default') self.assertEqual(xpath_attr(doc, 'div/p', 'y', default='default'), 'default') self.assertRaises(ExtractorError, xpath_attr, doc, 'div/bar', 'x', fatal=True) self.assertRaises(ExtractorError, xpath_attr, doc, 'div/p', 'y', fatal=True) def test_smuggle_url(self): data = {'ö': 'ö', 'abc': [3]} url = 'https://foo.bar/baz?x=y#a' smug_url = smuggle_url(url, data) unsmug_url, unsmug_data = unsmuggle_url(smug_url) self.assertEqual(url, unsmug_url) self.assertEqual(data, unsmug_data) res_url, res_data = unsmuggle_url(url) self.assertEqual(res_url, url) self.assertEqual(res_data, None) smug_url = smuggle_url(url, {'a': 'b'}) smug_smug_url = smuggle_url(smug_url, {'c': 'd'}) res_url, res_data = unsmuggle_url(smug_smug_url) self.assertEqual(res_url, url) self.assertEqual(res_data, {'a': 'b', 'c': 'd'}) def test_shell_quote(self): args = ['ffmpeg', '-i', encodeFilename('ñ€ß\'.mp4')] self.assertEqual( shell_quote(args), """ffmpeg -i 'ñ€ß'"'"'.mp4'""" if compat_os_name != 'nt' else '''ffmpeg -i "ñ€ß'.mp4"''') def test_float_or_none(self): self.assertEqual(float_or_none('42.42'), 42.42) self.assertEqual(float_or_none('42'), 42.0) self.assertEqual(float_or_none(''), None) self.assertEqual(float_or_none(None), None) self.assertEqual(float_or_none([]), None) self.assertEqual(float_or_none(set()), None) def test_int_or_none(self): self.assertEqual(int_or_none('42'), 42) self.assertEqual(int_or_none(''), None) self.assertEqual(int_or_none(None), None) self.assertEqual(int_or_none([]), None) self.assertEqual(int_or_none(set()), None) def test_str_to_int(self): self.assertEqual(str_to_int('123,456'), 123456) self.assertEqual(str_to_int('123.456'), 123456) self.assertEqual(str_to_int(523), 523) self.assertEqual(str_to_int('noninteger'), None) self.assertEqual(str_to_int([]), None) def test_url_basename(self): self.assertEqual(url_basename('http://foo.de/'), '') self.assertEqual(url_basename('http://foo.de/bar/baz'), 'baz') self.assertEqual(url_basename('http://foo.de/bar/baz?x=y'), 'baz') self.assertEqual(url_basename('http://foo.de/bar/baz#x=y'), 'baz') self.assertEqual(url_basename('http://foo.de/bar/baz/'), 'baz') self.assertEqual( url_basename('http://media.w3.org/2010/05/sintel/trailer.mp4'), 'trailer.mp4') def test_base_url(self): self.assertEqual(base_url('http://foo.de/'), 'http://foo.de/') self.assertEqual(base_url('http://foo.de/bar'), 'http://foo.de/') self.assertEqual(base_url('http://foo.de/bar/'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz?x=z/x/c'), 'http://foo.de/bar/') self.assertEqual(base_url('http://foo.de/bar/baz&x=z&w=y/x/c'), 'http://foo.de/bar/baz&x=z&w=y/x/') def test_urljoin(self): self.assertEqual(urljoin('http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(b'http://foo.de/', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(b'http://foo.de/', b'/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('//foo.de/', '/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', '/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de', 'a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin(None, 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(None, '//foo.de/a/b/c.txt'), '//foo.de/a/b/c.txt') self.assertEqual(urljoin('', 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin(['foobar'], 'http://foo.de/a/b/c.txt'), 'http://foo.de/a/b/c.txt') self.assertEqual(urljoin('http://foo.de/', None), None) self.assertEqual(urljoin('http://foo.de/', ''), None) self.assertEqual(urljoin('http://foo.de/', ['foobar']), None) self.assertEqual(urljoin('http://foo.de/a/b/c.txt', '.././../d.txt'), 'http://foo.de/d.txt') self.assertEqual(urljoin('http://foo.de/a/b/c.txt', 'rtmp://foo.de'), 'rtmp://foo.de') self.assertEqual(urljoin(None, 'rtmp://foo.de'), 'rtmp://foo.de') def test_url_or_none(self): self.assertEqual(url_or_none(None), None) self.assertEqual(url_or_none(''), None) self.assertEqual(url_or_none('foo'), None) self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') self.assertEqual(url_or_none('https://foo.de'), 'https://foo.de') self.assertEqual(url_or_none('http$://foo.de'), None) self.assertEqual(url_or_none('http://foo.de'), 'http://foo.de') self.assertEqual(url_or_none('//foo.de'), '//foo.de') self.assertEqual(url_or_none('s3://foo.de'), None) self.assertEqual(url_or_none('rtmpte://foo.de'), 'rtmpte://foo.de') self.assertEqual(url_or_none('mms://foo.de'), 'mms://foo.de') self.assertEqual(url_or_none('rtspu://foo.de'), 'rtspu://foo.de') self.assertEqual(url_or_none('ftps://foo.de'), 'ftps://foo.de') def test_parse_age_limit(self): self.assertEqual(parse_age_limit(None), None) self.assertEqual(parse_age_limit(False), None) self.assertEqual(parse_age_limit('invalid'), None) self.assertEqual(parse_age_limit(0), 0) self.assertEqual(parse_age_limit(18), 18) self.assertEqual(parse_age_limit(21), 21) self.assertEqual(parse_age_limit(22), None) self.assertEqual(parse_age_limit('18'), 18) self.assertEqual(parse_age_limit('18+'), 18) self.assertEqual(parse_age_limit('PG-13'), 13) self.assertEqual(parse_age_limit('TV-14'), 14) self.assertEqual(parse_age_limit('TV-MA'), 17) self.assertEqual(parse_age_limit('TV14'), 14) self.assertEqual(parse_age_limit('TV_G'), 0) def test_parse_duration(self): self.assertEqual(parse_duration(None), None) self.assertEqual(parse_duration(False), None) self.assertEqual(parse_duration('invalid'), None) self.assertEqual(parse_duration('1'), 1) self.assertEqual(parse_duration('1337:12'), 80232) self.assertEqual(parse_duration('9:12:43'), 33163) self.assertEqual(parse_duration('12:00'), 720) self.assertEqual(parse_duration('00:01:01'), 61) self.assertEqual(parse_duration('x:y'), None) self.assertEqual(parse_duration('3h11m53s'), 11513) self.assertEqual(parse_duration('3h 11m 53s'), 11513) self.assertEqual(parse_duration('3 hours 11 minutes 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours 11 mins 53 secs'), 11513) self.assertEqual(parse_duration('3 hours, 11 minutes, 53 seconds'), 11513) self.assertEqual(parse_duration('3 hours, 11 mins, 53 secs'), 11513) self.assertEqual(parse_duration('62m45s'), 3765) self.assertEqual(parse_duration('6m59s'), 419) self.assertEqual(parse_duration('49s'), 49) self.assertEqual(parse_duration('0h0m0s'), 0) self.assertEqual(parse_duration('0m0s'), 0) self.assertEqual(parse_duration('0s'), 0) self.assertEqual(parse_duration('01:02:03.05'), 3723.05) self.assertEqual(parse_duration('T30M38S'), 1838) self.assertEqual(parse_duration('5 s'), 5) self.assertEqual(parse_duration('3 min'), 180) self.assertEqual(parse_duration('2.5 hours'), 9000) self.assertEqual(parse_duration('02:03:04'), 7384) self.assertEqual(parse_duration('01:02:03:04'), 93784) self.assertEqual(parse_duration('1 hour 3 minutes'), 3780) self.assertEqual(parse_duration('87 Min.'), 5220) self.assertEqual(parse_duration('PT1H0.040S'), 3600.04) self.assertEqual(parse_duration('PT00H03M30SZ'), 210) self.assertEqual(parse_duration('P0Y0M0DT0H4M20.880S'), 260.88) self.assertEqual(parse_duration('01:02:03:050'), 3723.05) self.assertEqual(parse_duration('103:050'), 103.05) self.assertEqual(parse_duration('1HR 3MIN'), 3780) self.assertEqual(parse_duration('2hrs 3mins'), 7380) def test_fix_xml_ampersands(self): self.assertEqual( fix_xml_ampersands('"&x=y&z=a'), '"&x=y&z=a') self.assertEqual( fix_xml_ampersands('"&x=y&wrong;&z=a'), '"&x=y&wrong;&z=a') self.assertEqual( fix_xml_ampersands('&'><"'), '&'><"') self.assertEqual( fix_xml_ampersands('Ӓ᪼'), 'Ӓ᪼') self.assertEqual(fix_xml_ampersands('&#&#'), '&#&#') def test_paged_list(self): def testPL(size, pagesize, sliceargs, expected): def get_page(pagenum): firstid = pagenum * pagesize upto = min(size, pagenum * pagesize + pagesize) yield from range(firstid, upto) pl = OnDemandPagedList(get_page, pagesize) got = pl.getslice(*sliceargs) self.assertEqual(got, expected) iapl = InAdvancePagedList(get_page, size // pagesize + 1, pagesize) got = iapl.getslice(*sliceargs) self.assertEqual(got, expected) testPL(5, 2, (), [0, 1, 2, 3, 4]) testPL(5, 2, (1,), [1, 2, 3, 4]) testPL(5, 2, (2,), [2, 3, 4]) testPL(5, 2, (4,), [4]) testPL(5, 2, (0, 3), [0, 1, 2]) testPL(5, 2, (1, 4), [1, 2, 3]) testPL(5, 2, (2, 99), [2, 3, 4]) testPL(5, 2, (20, 99), []) def test_read_batch_urls(self): f = io.StringIO('''\xef\xbb\xbf foo bar\r baz # More after this line\r ; or after this bam''') self.assertEqual(read_batch_urls(f), ['foo', 'bar', 'baz', 'bam']) def test_urlencode_postdata(self): data = urlencode_postdata({'username': 'foo@bar.com', 'password': '1234'}) self.assertTrue(isinstance(data, bytes)) def test_update_url_query(self): self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'quality': ['HD'], 'format': ['mp4']})), parse_qs('http://example.com/path?quality=HD&format=mp4')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'system': ['LINUX', 'WINDOWS']})), parse_qs('http://example.com/path?system=LINUX&system=WINDOWS')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': 'id,formats,subtitles'})), parse_qs('http://example.com/path?fields=id,formats,subtitles')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': ('id,formats,subtitles', 'thumbnails')})), parse_qs('http://example.com/path?fields=id,formats,subtitles&fields=thumbnails')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?manifest=f4m', {'manifest': []})), parse_qs('http://example.com/path')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path?system=LINUX&system=WINDOWS', {'system': 'LINUX'})), parse_qs('http://example.com/path?system=LINUX')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'fields': b'id,formats,subtitles'})), parse_qs('http://example.com/path?fields=id,formats,subtitles')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'width': 1080, 'height': 720})), parse_qs('http://example.com/path?width=1080&height=720')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'bitrate': 5020.43})), parse_qs('http://example.com/path?bitrate=5020.43')) self.assertEqual(parse_qs(update_url_query( 'http://example.com/path', {'test': '第二行тест'})), parse_qs('http://example.com/path?test=%E7%AC%AC%E4%BA%8C%E8%A1%8C%D1%82%D0%B5%D1%81%D1%82')) def test_multipart_encode(self): self.assertEqual( multipart_encode({b'field': b'value'}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="field"\r\n\r\nvalue\r\n--AAAAAA--\r\n') self.assertEqual( multipart_encode({'欄位'.encode(): '值'.encode()}, boundary='AAAAAA')[0], b'--AAAAAA\r\nContent-Disposition: form-data; name="\xe6\xac\x84\xe4\xbd\x8d"\r\n\r\n\xe5\x80\xbc\r\n--AAAAAA--\r\n') self.assertRaises( ValueError, multipart_encode, {b'field': b'value'}, boundary='value') def test_merge_dicts(self): self.assertEqual(merge_dicts({'a': 1}, {'b': 2}), {'a': 1, 'b': 2}) self.assertEqual(merge_dicts({'a': 1}, {'a': 2}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {'a': None}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {'a': ''}), {'a': 1}) self.assertEqual(merge_dicts({'a': 1}, {}), {'a': 1}) self.assertEqual(merge_dicts({'a': None}, {'a': 1}), {'a': 1}) self.assertEqual(merge_dicts({'a': ''}, {'a': 1}), {'a': ''}) self.assertEqual(merge_dicts({'a': ''}, {'a': 'abc'}), {'a': 'abc'}) self.assertEqual(merge_dicts({'a': None}, {'a': ''}, {'a': 'abc'}), {'a': 'abc'}) def test_encode_compat_str(self): self.assertEqual(encode_compat_str(b'\xd1\x82\xd0\xb5\xd1\x81\xd1\x82', 'utf-8'), 'тест') self.assertEqual(encode_compat_str('тест', 'utf-8'), 'тест') def test_parse_iso8601(self): self.assertEqual(parse_iso8601('2014-03-23T23:04:26+0100'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00'), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=dt.timedelta(hours=-7)), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T23:04:26', timezone=NO_DEFAULT), None) # default does not override timezone in date_str self.assertEqual(parse_iso8601('2014-03-23T23:04:26-07:00', timezone=dt.timedelta(hours=-10)), 1395641066) self.assertEqual(parse_iso8601('2014-03-23T22:04:26+0000'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26Z'), 1395612266) self.assertEqual(parse_iso8601('2014-03-23T22:04:26.1234Z'), 1395612266) self.assertEqual(parse_iso8601('2015-09-29T08:27:31.727'), 1443515251) self.assertEqual(parse_iso8601('2015-09-29T08-27-31.727'), None) def test_strip_jsonp(self): stripped = strip_jsonp('cb ([ {"id":"532cb",\n\n\n"x":\n3}\n]\n);') d = json.loads(stripped) self.assertEqual(d, [{'id': '532cb', 'x': 3}]) stripped = strip_jsonp('parseMetadata({"STATUS":"OK"})\n\n\n//epc') d = json.loads(stripped) self.assertEqual(d, {'STATUS': 'OK'}) stripped = strip_jsonp('ps.embedHandler({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) stripped = strip_jsonp('window.cb && window.cb({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) stripped = strip_jsonp('window.cb && cb({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) stripped = strip_jsonp('({"status": "success"});') d = json.loads(stripped) self.assertEqual(d, {'status': 'success'}) def test_strip_or_none(self): self.assertEqual(strip_or_none(' abc'), 'abc') self.assertEqual(strip_or_none('abc '), 'abc') self.assertEqual(strip_or_none(' abc '), 'abc') self.assertEqual(strip_or_none('\tabc\t'), 'abc') self.assertEqual(strip_or_none('\n\tabc\n\t'), 'abc') self.assertEqual(strip_or_none('abc'), 'abc') self.assertEqual(strip_or_none(''), '') self.assertEqual(strip_or_none(None), None) self.assertEqual(strip_or_none(42), None) self.assertEqual(strip_or_none([]), None) def test_uppercase_escape(self): self.assertEqual(uppercase_escape('aä'), 'aä') self.assertEqual(uppercase_escape('\\U0001d550'), '𝕐') def test_lowercase_escape(self): self.assertEqual(lowercase_escape('aä'), 'aä') self.assertEqual(lowercase_escape('\\u0026'), '&') def test_limit_length(self): self.assertEqual(limit_length(None, 12), None) self.assertEqual(limit_length('foo', 12), 'foo') self.assertTrue( limit_length('foo bar baz asd', 12).startswith('foo bar')) self.assertTrue('...' in limit_length('foo bar baz asd', 12)) def test_mimetype2ext(self): self.assertEqual(mimetype2ext(None), None) self.assertEqual(mimetype2ext('video/x-flv'), 'flv') self.assertEqual(mimetype2ext('application/x-mpegURL'), 'm3u8') self.assertEqual(mimetype2ext('text/vtt'), 'vtt') self.assertEqual(mimetype2ext('text/vtt;charset=utf-8'), 'vtt') self.assertEqual(mimetype2ext('text/html; charset=utf-8'), 'html') self.assertEqual(mimetype2ext('audio/x-wav'), 'wav') self.assertEqual(mimetype2ext('audio/x-wav;codec=pcm'), 'wav') def test_month_by_name(self): self.assertEqual(month_by_name(None), None) self.assertEqual(month_by_name('December', 'en'), 12) self.assertEqual(month_by_name('décembre', 'fr'), 12) self.assertEqual(month_by_name('December'), 12) self.assertEqual(month_by_name('décembre'), None) self.assertEqual(month_by_name('Unknown', 'unknown'), None) def test_parse_codecs(self): self.assertEqual(parse_codecs(''), {}) self.assertEqual(parse_codecs('avc1.77.30, mp4a.40.2'), { 'vcodec': 'avc1.77.30', 'acodec': 'mp4a.40.2', 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.2'), { 'vcodec': 'none', 'acodec': 'mp4a.40.2', 'dynamic_range': None, }) self.assertEqual(parse_codecs('mp4a.40.5,avc1.42001e'), { 'vcodec': 'avc1.42001e', 'acodec': 'mp4a.40.5', 'dynamic_range': None, }) self.assertEqual(parse_codecs('avc3.640028'), { 'vcodec': 'avc3.640028', 'acodec': 'none', 'dynamic_range': None, }) self.assertEqual(parse_codecs(', h264,,newcodec,aac'), { 'vcodec': 'h264', 'acodec': 'aac', 'dynamic_range': None, }) self.assertEqual(parse_codecs('av01.0.05M.08'), { 'vcodec': 'av01.0.05M.08', 'acodec': 'none', 'dynamic_range': None, }) self.assertEqual(parse_codecs('vp9.2'), { 'vcodec': 'vp9.2', 'acodec': 'none', 'dynamic_range': 'HDR10', }) self.assertEqual(parse_codecs('vp09.02.50.10.01.09.18.09.00'), { 'vcodec': 'vp09.02.50.10.01.09.18.09.00', 'acodec': 'none', 'dynamic_range': 'HDR10', }) self.assertEqual(parse_codecs('av01.0.12M.10.0.110.09.16.09.0'), { 'vcodec': 'av01.0.12M.10.0.110.09.16.09.0', 'acodec': 'none', 'dynamic_range': 'HDR10', }) self.assertEqual(parse_codecs('dvhe'), { 'vcodec': 'dvhe', 'acodec': 'none', 'dynamic_range': 'DV', }) self.assertEqual(parse_codecs('fLaC'), { 'vcodec': 'none', 'acodec': 'flac', 'dynamic_range': None, }) self.assertEqual(parse_codecs('theora, vorbis'), { 'vcodec': 'theora', 'acodec': 'vorbis', 'dynamic_range': None, }) self.assertEqual(parse_codecs('unknownvcodec, unknownacodec'), { 'vcodec': 'unknownvcodec', 'acodec': 'unknownacodec', }) self.assertEqual(parse_codecs('unknown'), {}) def test_escape_rfc3986(self): reserved = "!*'();:@&=+$,/?#[]" unreserved = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~' self.assertEqual(escape_rfc3986(reserved), reserved) self.assertEqual(escape_rfc3986(unreserved), unreserved) self.assertEqual(escape_rfc3986('тест'), '%D1%82%D0%B5%D1%81%D1%82') self.assertEqual(escape_rfc3986('%D1%82%D0%B5%D1%81%D1%82'), '%D1%82%D0%B5%D1%81%D1%82') self.assertEqual(escape_rfc3986('foo bar'), 'foo%20bar') self.assertEqual(escape_rfc3986('foo%20bar'), 'foo%20bar') def test_normalize_url(self): self.assertEqual( normalize_url('http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavré_FD.mp4'), 'http://wowza.imust.org/srv/vod/telemb/new/UPLOAD/UPLOAD/20224_IncendieHavre%CC%81_FD.mp4', ) self.assertEqual( normalize_url('http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erklärt/Das-Erste/Video?documentId=22673108&bcastId=5290'), 'http://www.ardmediathek.de/tv/Sturm-der-Liebe/Folge-2036-Zu-Mann-und-Frau-erkl%C3%A4rt/Das-Erste/Video?documentId=22673108&bcastId=5290', ) self.assertEqual( normalize_url('http://тест.рф/фрагмент'), 'http://xn--e1aybc.xn--p1ai/%D1%84%D1%80%D0%B0%D0%B3%D0%BC%D0%B5%D0%BD%D1%82', ) self.assertEqual( normalize_url('http://тест.рф/абв?абв=абв#абв'), 'http://xn--e1aybc.xn--p1ai/%D0%B0%D0%B1%D0%B2?%D0%B0%D0%B1%D0%B2=%D0%B0%D0%B1%D0%B2#%D0%B0%D0%B1%D0%B2', ) self.assertEqual(normalize_url('http://vimeo.com/56015672#at=0'), 'http://vimeo.com/56015672#at=0') self.assertEqual(normalize_url('http://www.example.com/../a/b/../c/./d.html'), 'http://www.example.com/a/c/d.html') def test_remove_dot_segments(self): self.assertEqual(remove_dot_segments('/a/b/c/./../../g'), '/a/g') self.assertEqual(remove_dot_segments('mid/content=5/../6'), 'mid/6') self.assertEqual(remove_dot_segments('/ad/../cd'), '/cd') self.assertEqual(remove_dot_segments('/ad/../cd/'), '/cd/') self.assertEqual(remove_dot_segments('/..'), '/') self.assertEqual(remove_dot_segments('/./'), '/') self.assertEqual(remove_dot_segments('/./a'), '/a') self.assertEqual(remove_dot_segments('/abc/./.././d/././e/.././f/./../../ghi'), '/ghi') self.assertEqual(remove_dot_segments('/'), '/') self.assertEqual(remove_dot_segments('/t'), '/t') self.assertEqual(remove_dot_segments('t'), 't') self.assertEqual(remove_dot_segments(''), '') self.assertEqual(remove_dot_segments('/../a/b/c'), '/a/b/c') self.assertEqual(remove_dot_segments('../a'), 'a') self.assertEqual(remove_dot_segments('./a'), 'a') self.assertEqual(remove_dot_segments('.'), '') self.assertEqual(remove_dot_segments('////'), '////') def test_js_to_json_vars_strings(self): self.assertDictEqual( json.loads(js_to_json( '''{ 'null': a, 'nullStr': b, 'true': c, 'trueStr': d, 'false': e, 'falseStr': f, 'unresolvedVar': g, }''', { 'a': 'null', 'b': '"null"', 'c': 'true', 'd': '"true"', 'e': 'false', 'f': '"false"', 'g': 'var', }, )), { 'null': None, 'nullStr': 'null', 'true': True, 'trueStr': 'true', 'false': False, 'falseStr': 'false', 'unresolvedVar': 'var', }, ) self.assertDictEqual( json.loads(js_to_json( '''{ 'int': a, 'intStr': b, 'float': c, 'floatStr': d, }''', { 'a': '123', 'b': '"123"', 'c': '1.23', 'd': '"1.23"', }, )), { 'int': 123, 'intStr': '123', 'float': 1.23, 'floatStr': '1.23', }, ) self.assertDictEqual( json.loads(js_to_json( '''{ 'object': a, 'objectStr': b, 'array': c, 'arrayStr': d, }''', { 'a': '{}', 'b': '"{}"', 'c': '[]', 'd': '"[]"', }, )), { 'object': {}, 'objectStr': '{}', 'array': [], 'arrayStr': '[]', }, ) def test_js_to_json_realworld(self): inp = '''{ 'clip':{'provider':'pseudo'} }''' self.assertEqual(js_to_json(inp), '''{ "clip":{"provider":"pseudo"} }''') json.loads(js_to_json(inp)) inp = '''{ 'playlist':[{'controls':{'all':null}}] }''' self.assertEqual(js_to_json(inp), '''{ "playlist":[{"controls":{"all":null}}] }''') inp = '''"The CW\\'s \\'Crazy Ex-Girlfriend\\'"''' self.assertEqual(js_to_json(inp), '''"The CW's 'Crazy Ex-Girlfriend'"''') inp = '"SAND Number: SAND 2013-7800P\\nPresenter: Tom Russo\\nHabanero Software Training - Xyce Software\\nXyce, Sandia\\u0027s"' json_code = js_to_json(inp) self.assertEqual(json.loads(json_code), json.loads(inp)) inp = '''{ 0:{src:'skipped', type: 'application/dash+xml'}, 1:{src:'skipped', type: 'application/vnd.apple.mpegURL'}, }''' self.assertEqual(js_to_json(inp), '''{ "0":{"src":"skipped", "type": "application/dash+xml"}, "1":{"src":"skipped", "type": "application/vnd.apple.mpegURL"} }''') inp = '''{"foo":101}''' self.assertEqual(js_to_json(inp), '''{"foo":101}''') inp = '''{"duration": "00:01:07"}''' self.assertEqual(js_to_json(inp), '''{"duration": "00:01:07"}''') inp = '''{segments: [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''' self.assertEqual(js_to_json(inp), '''{"segments": [{"offset":-3.885780586188048e-16,"duration":39.75000000000001}]}''') def test_js_to_json_edgecases(self): on = js_to_json("{abc_def:'1\\'\\\\2\\\\\\'3\"4'}") self.assertEqual(json.loads(on), {'abc_def': "1'\\2\\'3\"4"}) on = js_to_json('{"abc": true}') self.assertEqual(json.loads(on), {'abc': True}) # Ignore JavaScript code as well on = js_to_json('''{ "x": 1, y: "a", z: some.code }''') d = json.loads(on) self.assertEqual(d['x'], 1) self.assertEqual(d['y'], 'a') # Just drop ! prefix for now though this results in a wrong value on = js_to_json('''{ a: !0, b: !1, c: !!0, d: !!42.42, e: !!![], f: !"abc", g: !"", !42: 42 }''') self.assertEqual(json.loads(on), { 'a': 0, 'b': 1, 'c': 0, 'd': 42.42, 'e': [], 'f': 'abc', 'g': '', '42': 42, }) on = js_to_json('["abc", "def",]') self.assertEqual(json.loads(on), ['abc', 'def']) on = js_to_json('[/*comment\n*/"abc"/*comment\n*/,/*comment\n*/"def",/*comment\n*/]') self.assertEqual(json.loads(on), ['abc', 'def']) on = js_to_json('[//comment\n"abc" //comment\n,//comment\n"def",//comment\n]') self.assertEqual(json.loads(on), ['abc', 'def']) on = js_to_json('{"abc": "def",}') self.assertEqual(json.loads(on), {'abc': 'def'}) on = js_to_json('{/*comment\n*/"abc"/*comment\n*/:/*comment\n*/"def"/*comment\n*/,/*comment\n*/}') self.assertEqual(json.loads(on), {'abc': 'def'}) on = js_to_json('{ 0: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) on = js_to_json('{ /*comment\n*/0/*comment\n*/: /* " \n */ ",]" , }') self.assertEqual(json.loads(on), {'0': ',]'}) on = js_to_json('{ 0: // comment\n1 }') self.assertEqual(json.loads(on), {'0': 1}) on = js_to_json(r'["<p>x<\/p>"]') self.assertEqual(json.loads(on), ['<p>x</p>']) on = js_to_json(r'["\xaa"]') self.assertEqual(json.loads(on), ['\u00aa']) on = js_to_json("['a\\\nb']") self.assertEqual(json.loads(on), ['ab']) on = js_to_json("/*comment\n*/[/*comment\n*/'a\\\nb'/*comment\n*/]/*comment\n*/") self.assertEqual(json.loads(on), ['ab']) on = js_to_json('{0xff:0xff}') self.assertEqual(json.loads(on), {'255': 255}) on = js_to_json('{/*comment\n*/0xff/*comment\n*/:/*comment\n*/0xff/*comment\n*/}') self.assertEqual(json.loads(on), {'255': 255}) on = js_to_json('{077:077}') self.assertEqual(json.loads(on), {'63': 63}) on = js_to_json('{/*comment\n*/077/*comment\n*/:/*comment\n*/077/*comment\n*/}') self.assertEqual(json.loads(on), {'63': 63}) on = js_to_json('{42:42}') self.assertEqual(json.loads(on), {'42': 42}) on = js_to_json('{/*comment\n*/42/*comment\n*/:/*comment\n*/42/*comment\n*/}') self.assertEqual(json.loads(on), {'42': 42}) on = js_to_json('{42:4.2e1}') self.assertEqual(json.loads(on), {'42': 42.0}) on = js_to_json('{ "0x40": "0x40" }') self.assertEqual(json.loads(on), {'0x40': '0x40'}) on = js_to_json('{ "040": "040" }') self.assertEqual(json.loads(on), {'040': '040'}) on = js_to_json('[1,//{},\n2]') self.assertEqual(json.loads(on), [1, 2]) on = js_to_json(R'"\^\$\#"') self.assertEqual(json.loads(on), R'^$#', msg='Unnecessary escapes should be stripped') on = js_to_json('\'"\\""\'') self.assertEqual(json.loads(on), '"""', msg='Unnecessary quote escape should be escaped') on = js_to_json('[new Date("spam"), \'("eggs")\']') self.assertEqual(json.loads(on), ['spam', '("eggs")'], msg='Date regex should match a single string') def test_js_to_json_malformed(self): self.assertEqual(js_to_json('42a1'), '42"a1"') self.assertEqual(js_to_json('42a-1'), '42"a"-1') def test_js_to_json_template_literal(self): self.assertEqual(js_to_json('`Hello ${name}`', {'name': '"world"'}), '"Hello world"') self.assertEqual(js_to_json('`${name}${name}`', {'name': '"X"'}), '"XX"') self.assertEqual(js_to_json('`${name}${name}`', {'name': '5'}), '"55"') self.assertEqual(js_to_json('`${name}"${name}"`', {'name': '5'}), '"5\\"5\\""') self.assertEqual(js_to_json('`${name}`', {}), '"name"') def test_js_to_json_common_constructors(self): self.assertEqual(json.loads(js_to_json('new Map([["a", 5]])')), {'a': 5}) self.assertEqual(json.loads(js_to_json('Array(5, 10)')), [5, 10]) self.assertEqual(json.loads(js_to_json('new Array(15,5)')), [15, 5]) self.assertEqual(json.loads(js_to_json('new Map([Array(5, 10),new Array(15,5)])')), {'5': 10, '15': 5}) self.assertEqual(json.loads(js_to_json('new Date("123")')), '123') self.assertEqual(json.loads(js_to_json('new Date(\'2023-10-19\')')), '2023-10-19') def test_extract_attributes(self): self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes("<e x='y'>"), {'x': 'y'}) self.assertEqual(extract_attributes('<e x=y>'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="a \'b\' c">'), {'x': "a 'b' c"}) self.assertEqual(extract_attributes('<e x=\'a "b" c\'>'), {'x': 'a "b" c'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="y">'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x="&">'), {'x': '&'}) # XML self.assertEqual(extract_attributes('<e x=""">'), {'x': '"'}) self.assertEqual(extract_attributes('<e x="£">'), {'x': '£'}) # HTML 3.2 self.assertEqual(extract_attributes('<e x="λ">'), {'x': 'λ'}) # HTML 4.0 self.assertEqual(extract_attributes('<e x="&foo">'), {'x': '&foo'}) self.assertEqual(extract_attributes('<e x="\'">'), {'x': "'"}) self.assertEqual(extract_attributes('<e x=\'"\'>'), {'x': '"'}) self.assertEqual(extract_attributes('<e x >'), {'x': None}) self.assertEqual(extract_attributes('<e x=y a>'), {'x': 'y', 'a': None}) self.assertEqual(extract_attributes('<e x= y>'), {'x': 'y'}) self.assertEqual(extract_attributes('<e x=1 y=2 x=3>'), {'y': '2', 'x': '3'}) self.assertEqual(extract_attributes('<e \nx=\ny\n>'), {'x': 'y'}) self.assertEqual(extract_attributes('<e \nx=\n"y"\n>'), {'x': 'y'}) self.assertEqual(extract_attributes("<e \nx=\n'y'\n>"), {'x': 'y'}) self.assertEqual(extract_attributes('<e \nx="\ny\n">'), {'x': '\ny\n'}) self.assertEqual(extract_attributes('<e CAPS=x>'), {'caps': 'x'}) # Names lowercased self.assertEqual(extract_attributes('<e x=1 X=2>'), {'x': '2'}) self.assertEqual(extract_attributes('<e X=1 x=2>'), {'x': '2'}) self.assertEqual(extract_attributes('<e _:funny-name1=1>'), {'_:funny-name1': '1'}) self.assertEqual(extract_attributes('<e x="Fáilte 世界 \U0001f600">'), {'x': 'Fáilte 世界 \U0001f600'}) self.assertEqual(extract_attributes('<e x="décomposé">'), {'x': 'décompose\u0301'}) # "Narrow" Python builds don't support unicode code points outside BMP. try: chr(0x10000) supports_outside_bmp = True except ValueError: supports_outside_bmp = False if supports_outside_bmp: self.assertEqual(extract_attributes('<e x="Smile 😀!">'), {'x': 'Smile \U0001f600!'}) # Malformed HTML should not break attributes extraction on older Python self.assertEqual(extract_attributes('<mal"formed/>'), {}) def test_clean_html(self): self.assertEqual(clean_html('a:\nb'), 'a: b') self.assertEqual(clean_html('a:\n "b"'), 'a: "b"') self.assertEqual(clean_html('a<br>\xa0b'), 'a\nb') def test_intlist_to_bytes(self): self.assertEqual( intlist_to_bytes([0, 1, 127, 128, 255]), b'\x00\x01\x7f\x80\xff') def test_args_to_str(self): self.assertEqual( args_to_str(['foo', 'ba/r', '-baz', '2 be', '']), 'foo ba/r -baz \'2 be\' \'\'' if compat_os_name != 'nt' else 'foo ba/r -baz "2 be" ""', ) def test_parse_filesize(self): self.assertEqual(parse_filesize(None), None) self.assertEqual(parse_filesize(''), None) self.assertEqual(parse_filesize('91 B'), 91) self.assertEqual(parse_filesize('foobar'), None) self.assertEqual(parse_filesize('2 MiB'), 2097152) self.assertEqual(parse_filesize('5 GB'), 5000000000) self.assertEqual(parse_filesize('1.2Tb'), 1200000000000) self.assertEqual(parse_filesize('1.2tb'), 1200000000000) self.assertEqual(parse_filesize('1,24 KB'), 1240) self.assertEqual(parse_filesize('1,24 kb'), 1240) self.assertEqual(parse_filesize('8.5 megabytes'), 8500000) def test_parse_count(self): self.assertEqual(parse_count(None), None) self.assertEqual(parse_count(''), None) self.assertEqual(parse_count('0'), 0) self.assertEqual(parse_count('1000'), 1000) self.assertEqual(parse_count('1.000'), 1000) self.assertEqual(parse_count('1.1k'), 1100) self.assertEqual(parse_count('1.1 k'), 1100) self.assertEqual(parse_count('1,1 k'), 1100) self.assertEqual(parse_count('1.1kk'), 1100000) self.assertEqual(parse_count('1.1kk '), 1100000) self.assertEqual(parse_count('1,1kk'), 1100000) self.assertEqual(parse_count('100 views'), 100) self.assertEqual(parse_count('1,100 views'), 1100) self.assertEqual(parse_count('1.1kk views'), 1100000) self.assertEqual(parse_count('10M views'), 10000000) self.assertEqual(parse_count('has 10M views'), 10000000) def test_parse_resolution(self): self.assertEqual(parse_resolution(None), {}) self.assertEqual(parse_resolution(''), {}) self.assertEqual(parse_resolution(' 1920x1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920×1080 '), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('1920 x 1080'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('720p'), {'height': 720}) self.assertEqual(parse_resolution('4k'), {'height': 2160}) self.assertEqual(parse_resolution('8K'), {'height': 4320}) self.assertEqual(parse_resolution('pre_1920x1080_post'), {'width': 1920, 'height': 1080}) self.assertEqual(parse_resolution('ep1x2'), {}) self.assertEqual(parse_resolution('1920, 1080'), {'width': 1920, 'height': 1080}) def test_parse_bitrate(self): self.assertEqual(parse_bitrate(None), None) self.assertEqual(parse_bitrate(''), None) self.assertEqual(parse_bitrate('300kbps'), 300) self.assertEqual(parse_bitrate('1500kbps'), 1500) self.assertEqual(parse_bitrate('300 kbps'), 300) def test_version_tuple(self): self.assertEqual(version_tuple('1'), (1,)) self.assertEqual(version_tuple('10.23.344'), (10, 23, 344)) self.assertEqual(version_tuple('10.1-6'), (10, 1, 6)) # avconv style def test_detect_exe_version(self): self.assertEqual(detect_exe_version('''ffmpeg version 1.2.1 built on May 27 2013 08:37:26 with gcc 4.7 (Debian 4.7.3-4) configuration: --prefix=/usr --extra-'''), '1.2.1') self.assertEqual(detect_exe_version('''ffmpeg version N-63176-g1fb4685 built on May 15 2014 22:09:06 with gcc 4.8.2 (GCC)'''), 'N-63176-g1fb4685') self.assertEqual(detect_exe_version('''X server found. dri2 connection failed! Trying to open render node... Success at /dev/dri/renderD128. ffmpeg version 2.4.4 Copyright (c) 2000-2014 the FFmpeg ...'''), '2.4.4') def test_age_restricted(self): self.assertFalse(age_restricted(None, 10)) # unrestricted content self.assertFalse(age_restricted(1, None)) # unrestricted policy self.assertFalse(age_restricted(8, 10)) self.assertTrue(age_restricted(18, 14)) self.assertFalse(age_restricted(18, 18)) def test_is_html(self): self.assertFalse(is_html(b'\x49\x44\x43<html')) self.assertTrue(is_html(b'<!DOCTYPE foo>\xaaa')) self.assertTrue(is_html( # UTF-8 with BOM b'\xef\xbb\xbf<!DOCTYPE foo>\xaaa')) self.assertTrue(is_html( # UTF-16-LE b'\xff\xfe<\x00h\x00t\x00m\x00l\x00>\x00\xe4\x00', )) self.assertTrue(is_html( # UTF-16-BE b'\xfe\xff\x00<\x00h\x00t\x00m\x00l\x00>\x00\xe4', )) self.assertTrue(is_html( # UTF-32-BE b'\x00\x00\xFE\xFF\x00\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4')) self.assertTrue(is_html( # UTF-32-LE b'\xFF\xFE\x00\x00<\x00\x00\x00h\x00\x00\x00t\x00\x00\x00m\x00\x00\x00l\x00\x00\x00>\x00\x00\x00\xe4\x00\x00\x00')) def test_render_table(self): self.assertEqual( render_table( ['a', 'empty', 'bcd'], [[123, '', 4], [9999, '', 51]]), 'a empty bcd\n' '123 4\n' '9999 51') self.assertEqual( render_table( ['a', 'empty', 'bcd'], [[123, '', 4], [9999, '', 51]], hide_empty=True), 'a bcd\n' '123 4\n' '9999 51') self.assertEqual( render_table( ['\ta', 'bcd'], [['1\t23', 4], ['\t9999', 51]]), ' a bcd\n' '1 23 4\n' '9999 51') self.assertEqual( render_table( ['a', 'bcd'], [[123, 4], [9999, 51]], delim='-'), 'a bcd\n' '--------\n' '123 4\n' '9999 51') self.assertEqual( render_table( ['a', 'bcd'], [[123, 4], [9999, 51]], delim='-', extra_gap=2), 'a bcd\n' '----------\n' '123 4\n' '9999 51') def test_match_str(self): # Unary self.assertFalse(match_str('xy', {'x': 1200})) self.assertTrue(match_str('!xy', {'x': 1200})) self.assertTrue(match_str('x', {'x': 1200})) self.assertFalse(match_str('!x', {'x': 1200})) self.assertTrue(match_str('x', {'x': 0})) self.assertTrue(match_str('is_live', {'is_live': True})) self.assertFalse(match_str('is_live', {'is_live': False})) self.assertFalse(match_str('is_live', {'is_live': None})) self.assertFalse(match_str('is_live', {})) self.assertFalse(match_str('!is_live', {'is_live': True})) self.assertTrue(match_str('!is_live', {'is_live': False})) self.assertTrue(match_str('!is_live', {'is_live': None})) self.assertTrue(match_str('!is_live', {})) self.assertTrue(match_str('title', {'title': 'abc'})) self.assertTrue(match_str('title', {'title': ''})) self.assertFalse(match_str('!title', {'title': 'abc'})) self.assertFalse(match_str('!title', {'title': ''})) # Numeric self.assertFalse(match_str('x>0', {'x': 0})) self.assertFalse(match_str('x>0', {})) self.assertTrue(match_str('x>?0', {})) self.assertTrue(match_str('x>1K', {'x': 1200})) self.assertFalse(match_str('x>2K', {'x': 1200})) self.assertTrue(match_str('x>=1200 & x < 1300', {'x': 1200})) self.assertFalse(match_str('x>=1100 & x < 1200', {'x': 1200})) self.assertTrue(match_str('x > 1:0:0', {'x': 3700})) # String self.assertFalse(match_str('y=a212', {'y': 'foobar42'})) self.assertTrue(match_str('y=foobar42', {'y': 'foobar42'})) self.assertFalse(match_str('y!=foobar42', {'y': 'foobar42'})) self.assertTrue(match_str('y!=foobar2', {'y': 'foobar42'})) self.assertTrue(match_str('y^=foo', {'y': 'foobar42'})) self.assertFalse(match_str('y!^=foo', {'y': 'foobar42'})) self.assertFalse(match_str('y^=bar', {'y': 'foobar42'})) self.assertTrue(match_str('y!^=bar', {'y': 'foobar42'})) self.assertRaises(ValueError, match_str, 'x^=42', {'x': 42}) self.assertTrue(match_str('y*=bar', {'y': 'foobar42'})) self.assertFalse(match_str('y!*=bar', {'y': 'foobar42'})) self.assertFalse(match_str('y*=baz', {'y': 'foobar42'})) self.assertTrue(match_str('y!*=baz', {'y': 'foobar42'})) self.assertTrue(match_str('y$=42', {'y': 'foobar42'})) self.assertFalse(match_str('y$=43', {'y': 'foobar42'})) # And self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 90, 'description': 'foo'})) self.assertTrue(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'description': 'foo'})) self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 60, 'description': 'foo'})) self.assertFalse(match_str( 'like_count > 100 & dislike_count <? 50 & description', {'like_count': 190, 'dislike_count': 10})) # Regex self.assertTrue(match_str(r'x~=\bbar', {'x': 'foo bar'})) self.assertFalse(match_str(r'x~=\bbar.+', {'x': 'foo bar'})) self.assertFalse(match_str(r'x~=^FOO', {'x': 'foo bar'})) self.assertTrue(match_str(r'x~=(?i)^FOO', {'x': 'foo bar'})) # Quotes self.assertTrue(match_str(r'x^="foo"', {'x': 'foo "bar"'})) self.assertFalse(match_str(r'x^="foo "', {'x': 'foo "bar"'})) self.assertFalse(match_str(r'x$="bar"', {'x': 'foo "bar"'})) self.assertTrue(match_str(r'x$=" \"bar\""', {'x': 'foo "bar"'})) # Escaping & self.assertFalse(match_str(r'x=foo & bar', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x=foo \& bar', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x=foo \& bar & x^=foo', {'x': 'foo & bar'})) self.assertTrue(match_str(r'x="foo \& bar" & x^=foo', {'x': 'foo & bar'})) # Example from docs self.assertTrue(match_str( r"!is_live & like_count>?100 & description~='(?i)\bcats \& dogs\b'", {'description': 'Raining Cats & Dogs'})) # Incomplete self.assertFalse(match_str('id!=foo', {'id': 'foo'}, True)) self.assertTrue(match_str('x', {'id': 'foo'}, True)) self.assertTrue(match_str('!x', {'id': 'foo'}, True)) self.assertFalse(match_str('x', {'id': 'foo'}, False)) def test_parse_dfxp_time_expr(self): self.assertEqual(parse_dfxp_time_expr(None), None) self.assertEqual(parse_dfxp_time_expr(''), None) self.assertEqual(parse_dfxp_time_expr('0.1'), 0.1) self.assertEqual(parse_dfxp_time_expr('0.1s'), 0.1) self.assertEqual(parse_dfxp_time_expr('00:00:01'), 1.0) self.assertEqual(parse_dfxp_time_expr('00:00:01.100'), 1.1) self.assertEqual(parse_dfxp_time_expr('00:00:01:100'), 1.1) def test_dfxp2srt(self): dfxp_data = '''<?xml version="1.0" encoding="UTF-8"?> <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The following line contains Chinese characters and special symbols</p> <p begin="1" end="2">第二行<br/>♪♪</p> <p begin="2" dur="1"><span>Third<br/>Line</span></p> <p begin="3" end="-1">Lines with invalid timestamps are ignored</p> <p begin="-1" end="-1">Ignore, two</p> <p begin="3" dur="-1">Ignored, three</p> </div> </body> </tt>'''.encode() srt_data = '''1 00:00:00,000 --> 00:00:01,000 The following line contains Chinese characters and special symbols 2 00:00:01,000 --> 00:00:02,000 第二行 ♪♪ 3 00:00:02,000 --> 00:00:03,000 Third Line ''' self.assertEqual(dfxp2srt(dfxp_data), srt_data) dfxp_data_no_default_namespace = b'''<?xml version="1.0" encoding="UTF-8"?> <tt xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">The first line</p> </div> </body> </tt>''' srt_data = '''1 00:00:00,000 --> 00:00:01,000 The first line ''' self.assertEqual(dfxp2srt(dfxp_data_no_default_namespace), srt_data) dfxp_data_with_style = b'''<?xml version="1.0" encoding="utf-8"?> <tt xmlns="http://www.w3.org/2006/10/ttaf1" xmlns:ttp="http://www.w3.org/2006/10/ttaf1#parameter" ttp:timeBase="media" xmlns:tts="http://www.w3.org/2006/10/ttaf1#style" xml:lang="en" xmlns:ttm="http://www.w3.org/2006/10/ttaf1#metadata"> <head> <styling> <style id="s2" style="s0" tts:color="cyan" tts:fontWeight="bold" /> <style id="s1" style="s0" tts:color="yellow" tts:fontStyle="italic" /> <style id="s3" style="s0" tts:color="lime" tts:textDecoration="underline" /> <style id="s0" tts:backgroundColor="black" tts:fontStyle="normal" tts:fontSize="16" tts:fontFamily="sansSerif" tts:color="white" /> </styling> </head> <body tts:textAlign="center" style="s0"> <div> <p begin="00:00:02.08" id="p0" end="00:00:05.84">default style<span tts:color="red">custom style</span></p> <p style="s2" begin="00:00:02.08" id="p0" end="00:00:05.84"><span tts:color="lime">part 1<br /></span><span tts:color="cyan">part 2</span></p> <p style="s3" begin="00:00:05.84" id="p1" end="00:00:09.56">line 3<br />part 3</p> <p style="s1" tts:textDecoration="underline" begin="00:00:09.56" id="p2" end="00:00:12.36"><span style="s2" tts:color="lime">inner<br /> </span>style</p> </div> </body> </tt>''' srt_data = '''1 00:00:02,080 --> 00:00:05,840 <font color="white" face="sansSerif" size="16">default style<font color="red">custom style</font></font> 2 00:00:02,080 --> 00:00:05,840 <b><font color="cyan" face="sansSerif" size="16"><font color="lime">part 1 </font>part 2</font></b> 3 00:00:05,840 --> 00:00:09,560 <u><font color="lime">line 3 part 3</font></u> 4 00:00:09,560 --> 00:00:12,360 <i><u><font color="yellow"><font color="lime">inner </font>style</font></u></i> ''' self.assertEqual(dfxp2srt(dfxp_data_with_style), srt_data) dfxp_data_non_utf8 = '''<?xml version="1.0" encoding="UTF-16"?> <tt xmlns="http://www.w3.org/ns/ttml" xml:lang="en" xmlns:tts="http://www.w3.org/ns/ttml#parameter"> <body> <div xml:lang="en"> <p begin="0" end="1">Line 1</p> <p begin="1" end="2">第二行</p> </div> </body> </tt>'''.encode('utf-16') srt_data = '''1 00:00:00,000 --> 00:00:01,000 Line 1 2 00:00:01,000 --> 00:00:02,000 第二行 ''' self.assertEqual(dfxp2srt(dfxp_data_non_utf8), srt_data) def test_cli_option(self): self.assertEqual(cli_option({'proxy': '127.0.0.1:3128'}, '--proxy', 'proxy'), ['--proxy', '127.0.0.1:3128']) self.assertEqual(cli_option({'proxy': None}, '--proxy', 'proxy'), []) self.assertEqual(cli_option({}, '--proxy', 'proxy'), []) self.assertEqual(cli_option({'retries': 10}, '--retries', 'retries'), ['--retries', '10']) def test_cli_valueless_option(self): self.assertEqual(cli_valueless_option( {'downloader': 'external'}, '--external-downloader', 'downloader', 'external'), ['--external-downloader']) self.assertEqual(cli_valueless_option( {'downloader': 'internal'}, '--external-downloader', 'downloader', 'external'), []) self.assertEqual(cli_valueless_option( {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate']) self.assertEqual(cli_valueless_option( {'nocheckcertificate': False}, '--no-check-certificate', 'nocheckcertificate'), []) self.assertEqual(cli_valueless_option( {'checkcertificate': True}, '--no-check-certificate', 'checkcertificate', False), []) self.assertEqual(cli_valueless_option( {'checkcertificate': False}, '--no-check-certificate', 'checkcertificate', False), ['--no-check-certificate']) def test_cli_bool_option(self): self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate'), ['--no-check-certificate', 'true']) self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--no-check-certificate', 'nocheckcertificate', separator='='), ['--no-check-certificate=true']) self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), ['--check-certificate', 'false']) self.assertEqual( cli_bool_option( {'nocheckcertificate': True}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), ['--check-certificate=false']) self.assertEqual( cli_bool_option( {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true'), ['--check-certificate', 'true']) self.assertEqual( cli_bool_option( {'nocheckcertificate': False}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), ['--check-certificate=true']) self.assertEqual( cli_bool_option( {}, '--check-certificate', 'nocheckcertificate', 'false', 'true', '='), []) def test_ohdave_rsa_encrypt(self): N = 0xab86b6371b5318aaa1d3c9e612a9f1264f372323c8c0f19875b5fc3b3fd3afcc1e5bec527aa94bfa85bffc157e4245aebda05389a5357b75115ac94f074aefcd e = 65537 self.assertEqual( ohdave_rsa_encrypt(b'aa111222', e, N), '726664bd9a23fd0c70f9f1b84aab5e3905ce1e45a584e9cbcf9bcc7510338fc1986d6c599ff990d923aa43c51c0d9013cd572e13bc58f4ae48f2ed8c0b0ba881') def test_pkcs1pad(self): data = [1, 2, 3] padded_data = pkcs1pad(data, 32) self.assertEqual(padded_data[:2], [0, 2]) self.assertEqual(padded_data[28:], [0, 1, 2, 3]) self.assertRaises(ValueError, pkcs1pad, data, 8) def test_encode_base_n(self): self.assertEqual(encode_base_n(0, 30), '0') self.assertEqual(encode_base_n(80, 30), '2k') custom_table = '9876543210ZYXWVUTSRQPONMLKJIHGFEDCBA' self.assertEqual(encode_base_n(0, 30, custom_table), '9') self.assertEqual(encode_base_n(80, 30, custom_table), '7P') self.assertRaises(ValueError, encode_base_n, 0, 70) self.assertRaises(ValueError, encode_base_n, 0, 60, custom_table) def test_caesar(self): self.assertEqual(caesar('ace', 'abcdef', 2), 'cea') self.assertEqual(caesar('cea', 'abcdef', -2), 'ace') self.assertEqual(caesar('ace', 'abcdef', -2), 'eac') self.assertEqual(caesar('eac', 'abcdef', 2), 'ace') self.assertEqual(caesar('ace', 'abcdef', 0), 'ace') self.assertEqual(caesar('xyz', 'abcdef', 2), 'xyz') self.assertEqual(caesar('abc', 'acegik', 2), 'ebg') self.assertEqual(caesar('ebg', 'acegik', -2), 'abc') def test_rot47(self): self.assertEqual(rot47('yt-dlp'), r'JE\5=A') self.assertEqual(rot47('YT-DLP'), r'*%\s{!') def test_urshift(self): self.assertEqual(urshift(3, 1), 1) self.assertEqual(urshift(-3, 1), 2147483646) GET_ELEMENT_BY_CLASS_TEST_STRING = ''' <span class="foo bar">nice</span> ''' def test_get_element_by_class(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_class('foo', html), 'nice') self.assertEqual(get_element_by_class('no-such-class', html), None) def test_get_element_html_by_class(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_html_by_class('foo', html), html.strip()) self.assertEqual(get_element_by_class('no-such-class', html), None) GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING = ''' <div itemprop="author" itemscope>foo</div> ''' def test_get_element_by_attribute(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_by_attribute('class', 'foo bar', html), 'nice') self.assertEqual(get_element_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_by_attribute('class', 'no-such-foo', html), None) html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_by_attribute('itemprop', 'author', html), 'foo') def test_get_element_html_by_attribute(self): html = self.GET_ELEMENT_BY_CLASS_TEST_STRING self.assertEqual(get_element_html_by_attribute('class', 'foo bar', html), html.strip()) self.assertEqual(get_element_html_by_attribute('class', 'foo', html), None) self.assertEqual(get_element_html_by_attribute('class', 'no-such-foo', html), None) html = self.GET_ELEMENT_BY_ATTRIBUTE_TEST_STRING self.assertEqual(get_element_html_by_attribute('itemprop', 'author', html), html.strip()) GET_ELEMENTS_BY_CLASS_TEST_STRING = ''' <span class="foo bar">nice</span><span class="foo bar">also nice</span> ''' GET_ELEMENTS_BY_CLASS_RES = ['<span class="foo bar">nice</span>', '<span class="foo bar">also nice</span>'] def test_get_elements_by_class(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_class('foo', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_class('no-such-class', html), []) def test_get_elements_html_by_class(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_html_by_class('foo', html), self.GET_ELEMENTS_BY_CLASS_RES) self.assertEqual(get_elements_html_by_class('no-such-class', html), []) def test_get_elements_by_attribute(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_by_attribute('class', 'foo bar', html), ['nice', 'also nice']) self.assertEqual(get_elements_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_by_attribute('class', 'no-such-foo', html), []) def test_get_elements_html_by_attribute(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual(get_elements_html_by_attribute('class', 'foo bar', html), self.GET_ELEMENTS_BY_CLASS_RES) self.assertEqual(get_elements_html_by_attribute('class', 'foo', html), []) self.assertEqual(get_elements_html_by_attribute('class', 'no-such-foo', html), []) def test_get_elements_text_and_html_by_attribute(self): html = self.GET_ELEMENTS_BY_CLASS_TEST_STRING self.assertEqual( list(get_elements_text_and_html_by_attribute('class', 'foo bar', html)), list(zip(['nice', 'also nice'], self.GET_ELEMENTS_BY_CLASS_RES))) self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'foo', html)), []) self.assertEqual(list(get_elements_text_and_html_by_attribute('class', 'no-such-foo', html)), []) self.assertEqual(list(get_elements_text_and_html_by_attribute( 'class', 'foo', '<a class="foo">nice</a><span class="foo">nice</span>', tag='a')), [('nice', '<a class="foo">nice</a>')]) GET_ELEMENT_BY_TAG_TEST_STRING = ''' random text lorem ipsum</p> <div> this should be returned <span>this should also be returned</span> <div> this should also be returned </div> closing tag above should not trick, so this should also be returned </div> but this text should not be returned ''' GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[32:276] GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT = GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML[5:-6] GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML = GET_ELEMENT_BY_TAG_TEST_STRING.strip()[78:119] GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT = GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML[6:-7] def test_get_element_text_and_html_by_tag(self): html = self.GET_ELEMENT_BY_TAG_TEST_STRING self.assertEqual( get_element_text_and_html_by_tag('div', html), (self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_TEXT, self.GET_ELEMENT_BY_TAG_RES_OUTERDIV_HTML)) self.assertEqual( get_element_text_and_html_by_tag('span', html), (self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_TEXT, self.GET_ELEMENT_BY_TAG_RES_INNERSPAN_HTML)) self.assertRaises(compat_HTMLParseError, get_element_text_and_html_by_tag, 'article', html) def test_iri_to_uri(self): self.assertEqual( iri_to_uri('https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b'), 'https://www.google.com/search?q=foo&ie=utf-8&oe=utf-8&client=firefox-b') # Same self.assertEqual( iri_to_uri('https://www.google.com/search?q=Käsesoßenrührlöffel'), # German for cheese sauce stirring spoon 'https://www.google.com/search?q=K%C3%A4seso%C3%9Fenr%C3%BChrl%C3%B6ffel') self.assertEqual( iri_to_uri('https://www.google.com/search?q=lt<+gt>+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#'), 'https://www.google.com/search?q=lt%3C+gt%3E+eq%3D+amp%26+percent%25+hash%23+colon%3A+tilde~#trash=?&garbage=#') self.assertEqual( iri_to_uri('http://правозащита38.рф/category/news/'), 'http://xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') self.assertEqual( iri_to_uri('http://www.правозащита38.рф/category/news/'), 'http://www.xn--38-6kcaak9aj5chl4a3g.xn--p1ai/category/news/') self.assertEqual( iri_to_uri('https://i❤.ws/emojidomain/👍👏🤝💪'), 'https://xn--i-7iq.ws/emojidomain/%F0%9F%91%8D%F0%9F%91%8F%F0%9F%A4%9D%F0%9F%92%AA') self.assertEqual( iri_to_uri('http://日本語.jp/'), 'http://xn--wgv71a119e.jp/') self.assertEqual( iri_to_uri('http://导航.中国/'), 'http://xn--fet810g.xn--fiqs8s/') def test_clean_podcast_url(self): self.assertEqual(clean_podcast_url('https://www.podtrac.com/pts/redirect.mp3/chtbl.com/track/5899E/traffic.megaphone.fm/HSW7835899191.mp3'), 'https://traffic.megaphone.fm/HSW7835899191.mp3') self.assertEqual(clean_podcast_url('https://play.podtrac.com/npr-344098539/edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3'), 'https://edge1.pod.npr.org/anon.npr-podcasts/podcast/npr/waitwait/2020/10/20201003_waitwait_wwdtmpodcast201003-015621a5-f035-4eca-a9a1-7c118d90bc3c.mp3') self.assertEqual(clean_podcast_url('https://pdst.fm/e/2.gum.fm/chtbl.com/track/chrt.fm/track/34D33/pscrb.fm/rss/p/traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661'), 'https://traffic.megaphone.fm/ITLLC7765286967.mp3?updated=1687282661') self.assertEqual(clean_podcast_url('https://pdst.fm/e/https://mgln.ai/e/441/www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3'), 'https://www.buzzsprout.com/1121972/13019085-ep-252-the-deep-life-stack.mp3') def test_LazyList(self): it = list(range(10)) self.assertEqual(list(LazyList(it)), it) self.assertEqual(LazyList(it).exhaust(), it) self.assertEqual(LazyList(it)[5], it[5]) self.assertEqual(LazyList(it)[5:], it[5:]) self.assertEqual(LazyList(it)[:5], it[:5]) self.assertEqual(LazyList(it)[::2], it[::2]) self.assertEqual(LazyList(it)[1::2], it[1::2]) self.assertEqual(LazyList(it)[5::-1], it[5::-1]) self.assertEqual(LazyList(it)[6:2:-2], it[6:2:-2]) self.assertEqual(LazyList(it)[::-1], it[::-1]) self.assertTrue(LazyList(it)) self.assertFalse(LazyList(range(0))) self.assertEqual(len(LazyList(it)), len(it)) self.assertEqual(repr(LazyList(it)), repr(it)) self.assertEqual(str(LazyList(it)), str(it)) self.assertEqual(list(LazyList(it, reverse=True)), it[::-1]) self.assertEqual(list(reversed(LazyList(it))[::-1]), it) self.assertEqual(list(reversed(LazyList(it))[1:3:7]), it[::-1][1:3:7]) def test_LazyList_laziness(self): def test(ll, idx, val, cache): self.assertEqual(ll[idx], val) self.assertEqual(ll._cache, list(cache)) ll = LazyList(range(10)) test(ll, 0, 0, range(1)) test(ll, 5, 5, range(6)) test(ll, -3, 7, range(10)) ll = LazyList(range(10), reverse=True) test(ll, -1, 0, range(1)) test(ll, 3, 6, range(10)) ll = LazyList(itertools.count()) test(ll, 10, 10, range(11)) ll = reversed(ll) test(ll, -15, 14, range(15)) def test_format_bytes(self): self.assertEqual(format_bytes(0), '0.00B') self.assertEqual(format_bytes(1000), '1000.00B') self.assertEqual(format_bytes(1024), '1.00KiB') self.assertEqual(format_bytes(1024**2), '1.00MiB') self.assertEqual(format_bytes(1024**3), '1.00GiB') self.assertEqual(format_bytes(1024**4), '1.00TiB') self.assertEqual(format_bytes(1024**5), '1.00PiB') self.assertEqual(format_bytes(1024**6), '1.00EiB') self.assertEqual(format_bytes(1024**7), '1.00ZiB') self.assertEqual(format_bytes(1024**8), '1.00YiB') self.assertEqual(format_bytes(1024**9), '1024.00YiB') def test_hide_login_info(self): self.assertEqual(Config.hide_login_info(['-u', 'foo', '-p', 'bar']), ['-u', 'PRIVATE', '-p', 'PRIVATE']) self.assertEqual(Config.hide_login_info(['-u']), ['-u']) self.assertEqual(Config.hide_login_info(['-u', 'foo', '-u', 'bar']), ['-u', 'PRIVATE', '-u', 'PRIVATE']) self.assertEqual(Config.hide_login_info(['--username=foo']), ['--username=PRIVATE']) def test_locked_file(self): TEXT = 'test_locked_file\n' FILE = 'test_locked_file.ytdl' MODES = 'war' # Order is important try: for lock_mode in MODES: with locked_file(FILE, lock_mode, False) as f: if lock_mode == 'r': self.assertEqual(f.read(), TEXT * 2, 'Wrong file content') else: f.write(TEXT) for test_mode in MODES: testing_write = test_mode != 'r' try: with locked_file(FILE, test_mode, False): pass except (BlockingIOError, PermissionError): if not testing_write: # FIXME: blocked read access print(f'Known issue: Exclusive lock ({lock_mode}) blocks read access ({test_mode})') continue self.assertTrue(testing_write, f'{test_mode} is blocked by {lock_mode}') else: self.assertFalse(testing_write, f'{test_mode} is not blocked by {lock_mode}') finally: with contextlib.suppress(OSError): os.remove(FILE) def test_determine_file_encoding(self): self.assertEqual(determine_file_encoding(b''), (None, 0)) self.assertEqual(determine_file_encoding(b'--verbose -x --audio-format mkv\n'), (None, 0)) self.assertEqual(determine_file_encoding(b'\xef\xbb\xbf'), ('utf-8', 3)) self.assertEqual(determine_file_encoding(b'\x00\x00\xfe\xff'), ('utf-32-be', 4)) self.assertEqual(determine_file_encoding(b'\xff\xfe'), ('utf-16-le', 2)) self.assertEqual(determine_file_encoding(b'\xff\xfe# coding: utf-8\n--verbose'), ('utf-16-le', 2)) self.assertEqual(determine_file_encoding(b'# coding: utf-8\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding(b'# coding: someencodinghere-12345\n--verbose'), ('someencodinghere-12345', 0)) self.assertEqual(determine_file_encoding(b'#coding:utf-8\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding(b'# coding: utf-8 \r\n--verbose'), ('utf-8', 0)) self.assertEqual(determine_file_encoding('# coding: utf-32-be'.encode('utf-32-be')), ('utf-32-be', 0)) self.assertEqual(determine_file_encoding('# coding: utf-16-le'.encode('utf-16-le')), ('utf-16-le', 0)) def test_get_compatible_ext(self): self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None, None], vexts=['mp4'], aexts=['m4a', 'm4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['flv'], aexts=['flv']), 'flv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['m4a']), 'mp4') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['mp4'], aexts=['webm']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['m4a']), 'mkv') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['webm']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=[None], acodecs=[None], vexts=['webm'], aexts=['weba']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['h264'], acodecs=['mp4a'], vexts=['mov'], aexts=['m4a']), 'mp4') self.assertEqual(get_compatible_ext( vcodecs=['av01.0.12M.08'], acodecs=['opus'], vexts=['mp4'], aexts=['webm']), 'webm') self.assertEqual(get_compatible_ext( vcodecs=['vp9'], acodecs=['opus'], vexts=['webm'], aexts=['webm'], preferences=['flv', 'mp4']), 'mp4') self.assertEqual(get_compatible_ext( vcodecs=['av1'], acodecs=['mp4a'], vexts=['webm'], aexts=['m4a'], preferences=('webm', 'mkv')), 'mkv') def test_try_call(self): def total(*x, **kwargs): return sum(x) + sum(kwargs.values()) self.assertEqual(try_call(None), None, msg='not a fn should give None') self.assertEqual(try_call(lambda: 1), 1, msg='int fn with no expected_type should give int') self.assertEqual(try_call(lambda: 1, expected_type=int), 1, msg='int fn with expected_type int should give int') self.assertEqual(try_call(lambda: 1, expected_type=dict), None, msg='int fn with wrong expected_type should give None') self.assertEqual(try_call(total, args=(0, 1, 0), expected_type=int), 1, msg='fn should accept arglist') self.assertEqual(try_call(total, kwargs={'a': 0, 'b': 1, 'c': 0}, expected_type=int), 1, msg='fn should accept kwargs') self.assertEqual(try_call(lambda: 1, expected_type=dict), None, msg='int fn with no expected_type should give None') self.assertEqual(try_call(lambda x: {}, total, args=(42, ), expected_type=int), 42, msg='expect first int result with expected_type int') def test_variadic(self): self.assertEqual(variadic(None), (None, )) self.assertEqual(variadic('spam'), ('spam', )) self.assertEqual(variadic('spam', allowed_types=dict), 'spam') with warnings.catch_warnings(): warnings.simplefilter('ignore') self.assertEqual(variadic('spam', allowed_types=[dict]), 'spam') def test_http_header_dict(self): headers = HTTPHeaderDict() headers['ytdl-test'] = b'0' self.assertEqual(list(headers.items()), [('Ytdl-Test', '0')]) headers['ytdl-test'] = 1 self.assertEqual(list(headers.items()), [('Ytdl-Test', '1')]) headers['Ytdl-test'] = '2' self.assertEqual(list(headers.items()), [('Ytdl-Test', '2')]) self.assertTrue('ytDl-Test' in headers) self.assertEqual(str(headers), str(dict(headers))) self.assertEqual(repr(headers), str(dict(headers))) headers.update({'X-dlp': 'data'}) self.assertEqual(set(headers.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data')}) self.assertEqual(dict(headers), {'Ytdl-Test': '2', 'X-Dlp': 'data'}) self.assertEqual(len(headers), 2) self.assertEqual(headers.copy(), headers) headers2 = HTTPHeaderDict({'X-dlp': 'data3'}, **headers, **{'X-dlp': 'data2'}) self.assertEqual(set(headers2.items()), {('Ytdl-Test', '2'), ('X-Dlp', 'data2')}) self.assertEqual(len(headers2), 2) headers2.clear() self.assertEqual(len(headers2), 0) # ensure we prefer latter headers headers3 = HTTPHeaderDict({'Ytdl-TeSt': 1}, {'Ytdl-test': 2}) self.assertEqual(set(headers3.items()), {('Ytdl-Test', '2')}) del headers3['ytdl-tesT'] self.assertEqual(dict(headers3), {}) headers4 = HTTPHeaderDict({'ytdl-test': 'data;'}) self.assertEqual(set(headers4.items()), {('Ytdl-Test', 'data;')}) # common mistake: strip whitespace from values # https://github.com/yt-dlp/yt-dlp/issues/8729 headers5 = HTTPHeaderDict({'ytdl-test': ' data; '}) self.assertEqual(set(headers5.items()), {('Ytdl-Test', 'data;')}) def test_extract_basic_auth(self): assert extract_basic_auth('http://:foo.bar') == ('http://:foo.bar', None) assert extract_basic_auth('http://foo.bar') == ('http://foo.bar', None) assert extract_basic_auth('http://@foo.bar') == ('http://foo.bar', 'Basic Og==') assert extract_basic_auth('http://:pass@foo.bar') == ('http://foo.bar', 'Basic OnBhc3M=') assert extract_basic_auth('http://user:@foo.bar') == ('http://foo.bar', 'Basic dXNlcjo=') assert extract_basic_auth('http://user:pass@foo.bar') == ('http://foo.bar', 'Basic dXNlcjpwYXNz') @unittest.skipUnless(compat_os_name == 'nt', 'Only relevant on Windows') def test_windows_escaping(self): tests = [ 'test"&', '%CMDCMDLINE:~-1%&', 'a\nb', '"', '\\', '!', '^!', 'a \\ b', 'a \\" b', 'a \\ b\\', # We replace \r with \n ('a\r\ra', 'a\n\na'), ] def run_shell(args): stdout, stderr, error = Popen.run( args, text=True, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) assert not stderr assert not error return stdout for argument in tests: if isinstance(argument, str): expected = argument else: argument, expected = argument args = [sys.executable, '-c', 'import sys; print(end=sys.argv[1])', argument, 'end'] assert run_shell(args) == expected assert run_shell(shell_quote(args, shell=True)) == expected if __name__ == '__main__': unittest.main() ��������������������������������������������������������yt-dlp-2024.09.27/test/test_verbose_output.py�������������������������������������������������������0000664�0000000�0000000�00000005220�14675634471�0021027�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import subprocess rootDir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) class TestVerboseOutput(unittest.TestCase): def test_private_info_arg(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '--username', 'johnsmith@gmail.com', '--password', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'--username' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'--password' in serr) self.assertTrue(b'my_secret_password' not in serr) def test_private_info_shortarg(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '-u', 'johnsmith@gmail.com', '-p', 'my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'-u' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'-p' in serr) self.assertTrue(b'my_secret_password' not in serr) def test_private_info_eq(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '--username=johnsmith@gmail.com', '--password=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'--username' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'--password' in serr) self.assertTrue(b'my_secret_password' not in serr) def test_private_info_shortarg_eq(self): outp = subprocess.Popen( [ sys.executable, 'yt_dlp/__main__.py', '-v', '--ignore-config', '-u=johnsmith@gmail.com', '-p=my_secret_password', ], cwd=rootDir, stdout=subprocess.PIPE, stderr=subprocess.PIPE) sout, serr = outp.communicate() self.assertTrue(b'-u' in serr) self.assertTrue(b'johnsmith' not in serr) self.assertTrue(b'-p' in serr) self.assertTrue(b'my_secret_password' not in serr) if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_websockets.py�����������������������������������������������������������0000664�0000000�0000000�00000052070�14675634471�0020120�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import time import pytest from test.helper import verify_address_availability from yt_dlp.networking.common import Features, DEFAULT_TIMEOUT sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import http.client import http.cookiejar import http.server import json import random import ssl import threading from yt_dlp import socks, traverse_obj from yt_dlp.cookies import YoutubeDLCookieJar from yt_dlp.dependencies import websockets from yt_dlp.networking import Request from yt_dlp.networking.exceptions import ( CertificateVerifyError, HTTPError, ProxyError, RequestError, SSLError, TransportError, ) from yt_dlp.utils.networking import HTTPHeaderDict TEST_DIR = os.path.dirname(os.path.abspath(__file__)) def websocket_handler(websocket): for message in websocket: if isinstance(message, bytes): if message == b'bytes': return websocket.send('2') elif isinstance(message, str): if message == 'headers': return websocket.send(json.dumps(dict(websocket.request.headers))) elif message == 'path': return websocket.send(websocket.request.path) elif message == 'source_address': return websocket.send(websocket.remote_address[0]) elif message == 'str': return websocket.send('1') return websocket.send(message) def process_request(self, request): if request.path.startswith('/gen_'): status = http.HTTPStatus(int(request.path[5:])) if 300 <= status.value <= 300: return websockets.http11.Response( status.value, status.phrase, websockets.datastructures.Headers([('Location', '/')]), b'') return self.protocol.reject(status.value, status.phrase) elif request.path.startswith('/get_cookie'): response = self.protocol.accept(request) response.headers['Set-Cookie'] = 'test=ytdlp' return response return self.protocol.accept(request) def create_websocket_server(**ws_kwargs): import websockets.sync.server wsd = websockets.sync.server.serve( websocket_handler, '127.0.0.1', 0, process_request=process_request, open_timeout=2, **ws_kwargs) ws_port = wsd.socket.getsockname()[1] ws_server_thread = threading.Thread(target=wsd.serve_forever) ws_server_thread.daemon = True ws_server_thread.start() return ws_server_thread, ws_port def create_ws_websocket_server(): return create_websocket_server() def create_wss_websocket_server(): certfn = os.path.join(TEST_DIR, 'testcert.pem') sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.load_cert_chain(certfn, None) return create_websocket_server(ssl=sslctx) MTLS_CERT_DIR = os.path.join(TEST_DIR, 'testdata', 'certificate') def create_mtls_wss_websocket_server(): certfn = os.path.join(TEST_DIR, 'testcert.pem') cacertfn = os.path.join(MTLS_CERT_DIR, 'ca.crt') sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.verify_mode = ssl.CERT_REQUIRED sslctx.load_verify_locations(cafile=cacertfn) sslctx.load_cert_chain(certfn, None) return create_websocket_server(ssl=sslctx) def create_legacy_wss_websocket_server(): certfn = os.path.join(TEST_DIR, 'testcert.pem') sslctx = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) sslctx.maximum_version = ssl.TLSVersion.TLSv1_2 sslctx.set_ciphers('SHA1:AESCCM:aDSS:eNULL:aNULL') sslctx.load_cert_chain(certfn, None) return create_websocket_server(ssl=sslctx) def ws_validate_and_send(rh, req): rh.validate(req) max_tries = 3 for i in range(max_tries): try: return rh.send(req) except TransportError as e: if i < (max_tries - 1) and 'connection closed during handshake' in str(e): # websockets server sometimes hangs on new connections continue raise @pytest.mark.skipif(not websockets, reason='websockets must be installed to test websocket request handlers') @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) class TestWebsSocketRequestHandlerConformance: @classmethod def setup_class(cls): cls.ws_thread, cls.ws_port = create_ws_websocket_server() cls.ws_base_url = f'ws://127.0.0.1:{cls.ws_port}' cls.wss_thread, cls.wss_port = create_wss_websocket_server() cls.wss_base_url = f'wss://127.0.0.1:{cls.wss_port}' cls.bad_wss_thread, cls.bad_wss_port = create_websocket_server(ssl=ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER)) cls.bad_wss_host = f'wss://127.0.0.1:{cls.bad_wss_port}' cls.mtls_wss_thread, cls.mtls_wss_port = create_mtls_wss_websocket_server() cls.mtls_wss_base_url = f'wss://127.0.0.1:{cls.mtls_wss_port}' cls.legacy_wss_thread, cls.legacy_wss_port = create_legacy_wss_websocket_server() cls.legacy_wss_host = f'wss://127.0.0.1:{cls.legacy_wss_port}' def test_basic_websockets(self, handler): with handler() as rh: ws = ws_validate_and_send(rh, Request(self.ws_base_url)) assert 'upgrade' in ws.headers assert ws.status == 101 ws.send('foo') assert ws.recv() == 'foo' ws.close() # https://www.rfc-editor.org/rfc/rfc6455.html#section-5.6 @pytest.mark.parametrize('msg,opcode', [('str', 1), (b'bytes', 2)]) def test_send_types(self, handler, msg, opcode): with handler() as rh: ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send(msg) assert int(ws.recv()) == opcode ws.close() def test_verify_cert(self, handler): with handler() as rh: with pytest.raises(CertificateVerifyError): ws_validate_and_send(rh, Request(self.wss_base_url)) with handler(verify=False) as rh: ws = ws_validate_and_send(rh, Request(self.wss_base_url)) assert ws.status == 101 ws.close() def test_ssl_error(self, handler): with handler(verify=False) as rh: with pytest.raises(SSLError, match=r'ssl(?:v3|/tls) alert handshake failure') as exc_info: ws_validate_and_send(rh, Request(self.bad_wss_host)) assert not issubclass(exc_info.type, CertificateVerifyError) def test_legacy_ssl_extension(self, handler): with handler(verify=False) as rh: ws = ws_validate_and_send(rh, Request(self.legacy_wss_host, extensions={'legacy_ssl': True})) assert ws.status == 101 ws.close() # Ensure only applies to request extension with pytest.raises(SSLError): ws_validate_and_send(rh, Request(self.legacy_wss_host)) def test_legacy_ssl_support(self, handler): with handler(verify=False, legacy_ssl_support=True) as rh: ws = ws_validate_and_send(rh, Request(self.legacy_wss_host)) assert ws.status == 101 ws.close() @pytest.mark.parametrize('path,expected', [ # Unicode characters should be encoded with uppercase percent-encoding ('/中文', '/%E4%B8%AD%E6%96%87'), # don't normalize existing percent encodings ('/%c7%9f', '/%c7%9f'), ]) def test_percent_encode(self, handler, path, expected): with handler() as rh: ws = ws_validate_and_send(rh, Request(f'{self.ws_base_url}{path}')) ws.send('path') assert ws.recv() == expected assert ws.status == 101 ws.close() def test_remove_dot_segments(self, handler): with handler() as rh: # This isn't a comprehensive test, # but it should be enough to check whether the handler is removing dot segments ws = ws_validate_and_send(rh, Request(f'{self.ws_base_url}/a/b/./../../test')) assert ws.status == 101 ws.send('path') assert ws.recv() == '/test' ws.close() # We are restricted to known HTTP status codes in http.HTTPStatus # Redirects are not supported for websockets @pytest.mark.parametrize('status', (200, 204, 301, 302, 303, 400, 500, 511)) def test_raise_http_error(self, handler, status): with handler() as rh: with pytest.raises(HTTPError) as exc_info: ws_validate_and_send(rh, Request(f'{self.ws_base_url}/gen_{status}')) assert exc_info.value.status == status @pytest.mark.parametrize('params,extensions', [ ({'timeout': sys.float_info.min}, {}), ({}, {'timeout': sys.float_info.min}), ]) def test_read_timeout(self, handler, params, extensions): with handler(**params) as rh: with pytest.raises(TransportError): ws_validate_and_send(rh, Request(self.ws_base_url, extensions=extensions)) def test_connect_timeout(self, handler): # nothing should be listening on this port connect_timeout_url = 'ws://10.255.255.255' with handler(timeout=0.01) as rh, pytest.raises(TransportError): now = time.time() ws_validate_and_send(rh, Request(connect_timeout_url)) assert time.time() - now < DEFAULT_TIMEOUT # Per request timeout, should override handler timeout request = Request(connect_timeout_url, extensions={'timeout': 0.01}) with handler() as rh, pytest.raises(TransportError): now = time.time() ws_validate_and_send(rh, request) assert time.time() - now < DEFAULT_TIMEOUT def test_cookies(self, handler): cookiejar = YoutubeDLCookieJar() cookiejar.set_cookie(http.cookiejar.Cookie( version=0, name='test', value='ytdlp', port=None, port_specified=False, domain='127.0.0.1', domain_specified=True, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=False, comment=None, comment_url=None, rest={})) with handler(cookiejar=cookiejar) as rh: ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' ws.close() with handler() as rh: ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') assert 'cookie' not in json.loads(ws.recv()) ws.close() ws = ws_validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': cookiejar})) ws.send('headers') assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' ws.close() @pytest.mark.skip_handler('Websockets', 'Set-Cookie not supported by websockets') def test_cookie_sync_only_cookiejar(self, handler): # Ensure that cookies are ONLY being handled by the cookiejar with handler() as rh: ws_validate_and_send(rh, Request(f'{self.ws_base_url}/get_cookie', extensions={'cookiejar': YoutubeDLCookieJar()})) ws = ws_validate_and_send(rh, Request(self.ws_base_url, extensions={'cookiejar': YoutubeDLCookieJar()})) ws.send('headers') assert 'cookie' not in json.loads(ws.recv()) ws.close() @pytest.mark.skip_handler('Websockets', 'Set-Cookie not supported by websockets') def test_cookie_sync_delete_cookie(self, handler): # Ensure that cookies are ONLY being handled by the cookiejar cookiejar = YoutubeDLCookieJar() with handler(verbose=True, cookiejar=cookiejar) as rh: ws_validate_and_send(rh, Request(f'{self.ws_base_url}/get_cookie')) ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') assert json.loads(ws.recv())['cookie'] == 'test=ytdlp' ws.close() cookiejar.clear_session_cookies() ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') assert 'cookie' not in json.loads(ws.recv()) ws.close() def test_source_address(self, handler): source_address = f'127.0.0.{random.randint(5, 255)}' verify_address_availability(source_address) with handler(source_address=source_address) as rh: ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('source_address') assert source_address == ws.recv() ws.close() def test_response_url(self, handler): with handler() as rh: url = f'{self.ws_base_url}/something' ws = ws_validate_and_send(rh, Request(url)) assert ws.url == url ws.close() def test_request_headers(self, handler): with handler(headers=HTTPHeaderDict({'test1': 'test', 'test2': 'test2'})) as rh: # Global Headers ws = ws_validate_and_send(rh, Request(self.ws_base_url)) ws.send('headers') headers = HTTPHeaderDict(json.loads(ws.recv())) assert headers['test1'] == 'test' ws.close() # Per request headers, merged with global ws = ws_validate_and_send(rh, Request( self.ws_base_url, headers={'test2': 'changed', 'test3': 'test3'})) ws.send('headers') headers = HTTPHeaderDict(json.loads(ws.recv())) assert headers['test1'] == 'test' assert headers['test2'] == 'changed' assert headers['test3'] == 'test3' ws.close() @pytest.mark.parametrize('client_cert', ( {'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithkey.crt')}, { 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'), 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'client.key'), }, { 'client_certificate': os.path.join(MTLS_CERT_DIR, 'clientwithencryptedkey.crt'), 'client_certificate_password': 'foobar', }, { 'client_certificate': os.path.join(MTLS_CERT_DIR, 'client.crt'), 'client_certificate_key': os.path.join(MTLS_CERT_DIR, 'clientencrypted.key'), 'client_certificate_password': 'foobar', }, )) def test_mtls(self, handler, client_cert): with handler( # Disable client-side validation of unacceptable self-signed testcert.pem # The test is of a check on the server side, so unaffected verify=False, client_cert=client_cert, ) as rh: ws_validate_and_send(rh, Request(self.mtls_wss_base_url)).close() def test_request_disable_proxy(self, handler): for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']: # Given handler is configured with a proxy with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: # When a proxy is explicitly set to None for the request ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'http': None})) # Then no proxy should be used assert ws.status == 101 ws.close() @pytest.mark.skip_handlers_if( lambda _, handler: Features.NO_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support NO_PROXY') def test_noproxy(self, handler): for proxy_proto in handler._SUPPORTED_PROXY_SCHEMES or ['ws']: # Given the handler is configured with a proxy with handler(proxies={'ws': f'{proxy_proto}://10.255.255.255'}, timeout=5) as rh: for no_proxy in (f'127.0.0.1:{self.ws_port}', '127.0.0.1', 'localhost'): # When request no proxy includes the request url host ws = ws_validate_and_send(rh, Request(self.ws_base_url, proxies={'no': no_proxy})) # Then the proxy should not be used assert ws.status == 101 ws.close() @pytest.mark.skip_handlers_if( lambda _, handler: Features.ALL_PROXY not in handler._SUPPORTED_FEATURES, 'handler does not support ALL_PROXY') def test_allproxy(self, handler): supported_proto = traverse_obj(handler._SUPPORTED_PROXY_SCHEMES, 0, default='ws') # This is a bit of a hacky test, but it should be enough to check whether the handler is using the proxy. # 0.1s might not be enough of a timeout if proxy is not used in all cases, but should still get failures. with handler(proxies={'all': f'{supported_proto}://10.255.255.255'}, timeout=0.1) as rh: with pytest.raises(TransportError): ws_validate_and_send(rh, Request(self.ws_base_url)).close() with handler(timeout=0.1) as rh: with pytest.raises(TransportError): ws_validate_and_send( rh, Request(self.ws_base_url, proxies={'all': f'{supported_proto}://10.255.255.255'})).close() def create_fake_ws_connection(raised): import websockets.sync.client class FakeWsConnection(websockets.sync.client.ClientConnection): def __init__(self, *args, **kwargs): class FakeResponse: body = b'' headers = {} status_code = 101 reason_phrase = 'test' self.response = FakeResponse() def send(self, *args, **kwargs): raise raised() def recv(self, *args, **kwargs): raise raised() def close(self, *args, **kwargs): return return FakeWsConnection() @pytest.mark.parametrize('handler', ['Websockets'], indirect=True) class TestWebsocketsRequestHandler: @pytest.mark.parametrize('raised,expected', [ # https://websockets.readthedocs.io/en/stable/reference/exceptions.html (lambda: websockets.exceptions.InvalidURI(msg='test', uri='test://'), RequestError), # Requires a response object. Should be covered by HTTP error tests. # (lambda: websockets.exceptions.InvalidStatus(), TransportError), (lambda: websockets.exceptions.InvalidHandshake(), TransportError), # These are subclasses of InvalidHandshake (lambda: websockets.exceptions.InvalidHeader(name='test'), TransportError), (lambda: websockets.exceptions.NegotiationError(), TransportError), # Catch-all (lambda: websockets.exceptions.WebSocketException(), TransportError), (lambda: TimeoutError(), TransportError), # These may be raised by our create_connection implementation, which should also be caught (lambda: OSError(), TransportError), (lambda: ssl.SSLError(), SSLError), (lambda: ssl.SSLCertVerificationError(), CertificateVerifyError), (lambda: socks.ProxyError(), ProxyError), ]) def test_request_error_mapping(self, handler, monkeypatch, raised, expected): import websockets.sync.client import yt_dlp.networking._websockets with handler() as rh: def fake_connect(*args, **kwargs): raise raised() monkeypatch.setattr(yt_dlp.networking._websockets, 'create_connection', lambda *args, **kwargs: None) monkeypatch.setattr(websockets.sync.client, 'connect', fake_connect) with pytest.raises(expected) as exc_info: rh.send(Request('ws://fake-url')) assert exc_info.type is expected @pytest.mark.parametrize('raised,expected,match', [ # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.send (lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None), (lambda: RuntimeError(), TransportError, None), (lambda: TimeoutError(), TransportError, None), (lambda: TypeError(), RequestError, None), (lambda: socks.ProxyError(), ProxyError, None), # Catch-all (lambda: websockets.exceptions.WebSocketException(), TransportError, None), ]) def test_ws_send_error_mapping(self, handler, monkeypatch, raised, expected, match): from yt_dlp.networking._websockets import WebsocketsResponseAdapter ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url') with pytest.raises(expected, match=match) as exc_info: ws.send('test') assert exc_info.type is expected @pytest.mark.parametrize('raised,expected,match', [ # https://websockets.readthedocs.io/en/stable/reference/sync/client.html#websockets.sync.client.ClientConnection.recv (lambda: websockets.exceptions.ConnectionClosed(None, None), TransportError, None), (lambda: RuntimeError(), TransportError, None), (lambda: TimeoutError(), TransportError, None), (lambda: socks.ProxyError(), ProxyError, None), # Catch-all (lambda: websockets.exceptions.WebSocketException(), TransportError, None), ]) def test_ws_recv_error_mapping(self, handler, monkeypatch, raised, expected, match): from yt_dlp.networking._websockets import WebsocketsResponseAdapter ws = WebsocketsResponseAdapter(create_fake_ws_connection(raised), url='ws://fake-url') with pytest.raises(expected, match=match) as exc_info: ws.recv() assert exc_info.type is expected ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_write_annotations.py.disabled�������������������������������������������0000664�0000000�0000000�00000004660�14675634471�0023266�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import xml.etree.ElementTree import yt_dlp.extractor import yt_dlp.YoutubeDL from test.helper import get_params, is_download_test, try_rm class YoutubeDL(yt_dlp.YoutubeDL): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.to_stderr = self.to_screen params = get_params({ 'writeannotations': True, 'skip_download': True, 'writeinfojson': False, 'format': 'flv', }) TEST_ID = 'gr51aVj-mLg' ANNOTATIONS_FILE = TEST_ID + '.annotations.xml' EXPECTED_ANNOTATIONS = ['Speech bubble', 'Note', 'Title', 'Spotlight', 'Label'] @is_download_test class TestAnnotations(unittest.TestCase): def setUp(self): # Clear old files self.tearDown() def test_info_json(self): expected = list(EXPECTED_ANNOTATIONS) # Two annotations could have the same text. ie = yt_dlp.extractor.YoutubeIE() ydl = YoutubeDL(params) ydl.add_info_extractor(ie) ydl.download([TEST_ID]) self.assertTrue(os.path.exists(ANNOTATIONS_FILE)) annoxml = None with open(ANNOTATIONS_FILE, encoding='utf-8') as annof: annoxml = xml.etree.ElementTree.parse(annof) self.assertTrue(annoxml is not None, 'Failed to parse annotations XML') root = annoxml.getroot() self.assertEqual(root.tag, 'document') annotationsTag = root.find('annotations') self.assertEqual(annotationsTag.tag, 'annotations') annotations = annotationsTag.findall('annotation') # Not all the annotations have TEXT children and the annotations are returned unsorted. for a in annotations: self.assertEqual(a.tag, 'annotation') if a.get('type') == 'text': textTag = a.find('TEXT') text = textTag.text self.assertTrue(text in expected) # assertIn only added in python 2.7 # remove the first occurrence, there could be more than one annotation with the same text expected.remove(text) # We should have seen (and removed) all the expected annotation texts. self.assertEqual(len(expected), 0, 'Not all expected annotations were found.') def tearDown(self): try_rm(ANNOTATIONS_FILE) if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_youtube_lists.py��������������������������������������������������������0000664�0000000�0000000�00000005264�14675634471�0020664�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE, YoutubeTabIE from yt_dlp.utils import ExtractorError @is_download_test class TestYoutubeLists(unittest.TestCase): def assertIsPlaylist(self, info): """Make sure the info has '_type' set to 'playlist'""" self.assertEqual(info['_type'], 'playlist') def test_youtube_playlist_noplaylist(self): dl = FakeYDL() dl.params['noplaylist'] = True ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=OmJ-4B-mS-Y&list=PLydZ2Hrp_gPRJViZjLFKaBMgCQOYEEkyp&index=2') self.assertEqual(result['_type'], 'url') self.assertEqual(result['ie_key'], YoutubeIE.ie_key()) self.assertEqual(YoutubeIE.extract_id(result['url']), 'OmJ-4B-mS-Y') def test_youtube_mix(self): dl = FakeYDL() ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/watch?v=tyITL_exICo&list=RDCLAK5uy_kLWIr9gv1XLlPbaDS965-Db4TrBoUTxQ8') entries = list(result['entries']) self.assertTrue(len(entries) >= 50) original_video = entries[0] self.assertEqual(original_video['id'], 'tyITL_exICo') def test_youtube_flat_playlist_extraction(self): dl = FakeYDL() dl.params['extract_flat'] = True ie = YoutubeTabIE(dl) result = ie.extract('https://www.youtube.com/playlist?list=PL4lCao7KL_QFVb7Iudeipvc2BCavECqzc') self.assertIsPlaylist(result) entries = list(result['entries']) self.assertTrue(len(entries) == 1) video = entries[0] self.assertEqual(video['_type'], 'url') self.assertEqual(video['ie_key'], 'Youtube') self.assertEqual(video['id'], 'BaW_jenozKc') self.assertEqual(video['url'], 'https://www.youtube.com/watch?v=BaW_jenozKc') self.assertEqual(video['title'], 'youtube-dl test video "\'/\\ä↭𝕐') self.assertEqual(video['duration'], 10) self.assertEqual(video['uploader'], 'Philipp Hagemeister') def test_youtube_channel_no_uploads(self): dl = FakeYDL() dl.params['extract_flat'] = True ie = YoutubeTabIE(dl) # no uploads with self.assertRaisesRegex(ExtractorError, r'no uploads'): ie.extract('https://www.youtube.com/channel/UC2yXPzFejc422buOIzn_0CA') # no uploads and no UCID given with self.assertRaisesRegex(ExtractorError, r'no uploads'): ie.extract('https://www.youtube.com/news') if __name__ == '__main__': unittest.main() ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/test_youtube_misc.py���������������������������������������������������������0000664�0000000�0000000�00000001720�14675634471�0020452�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) from yt_dlp.extractor import YoutubeIE class TestYoutubeMisc(unittest.TestCase): def test_youtube_extract(self): assertExtractId = lambda url, video_id: self.assertEqual(YoutubeIE.extract_id(url), video_id) assertExtractId('http://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch?feature=player_embedded&v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('https://www.youtube.com/watch_popup?v=BaW_jenozKc', 'BaW_jenozKc') assertExtractId('http://www.youtube.com/watch?v=BaW_jenozKcsharePLED17F32AD9753930', 'BaW_jenozKc') assertExtractId('BaW_jenozKc', 'BaW_jenozKc') if __name__ == '__main__': unittest.main() ������������������������������������������������yt-dlp-2024.09.27/test/test_youtube_signature.py����������������������������������������������������0000664�0000000�0000000�00000024373�14675634471�0021531�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env python3 # Allow direct execution import os import sys import unittest sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import contextlib import re import string import urllib.request from test.helper import FakeYDL, is_download_test from yt_dlp.extractor import YoutubeIE from yt_dlp.jsinterp import JSInterpreter _SIG_TESTS = [ ( 'https://s.ytimg.com/yts/jsbin/html5player-vflHOr_nV.js', 86, '>=<;:/.-[+*)(\'&%$#"!ZYX0VUTSRQPONMLKJIHGFEDCBA\\yxwvutsrqponmlkjihgfedcba987654321', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-vfldJ8xgI.js', 85, '3456789a0cdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRS[UVWXYZ!"#$%&\'()*+,-./:;<=>?@', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-vfle-mVwz.js', 90, ']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js', 84, 'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', '2ACFC7A61CA478CD21425E5A57EBD73DDC78E22A.2094302436B2D377D14A3BBA23022D023B8BC25AA', 'A52CB8B320D22032ABB3A41D773D2B6342034902.A22E87CDD37DBE75A5E52412DC874AC16A7CFCA2', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflBb0OQx.js', 84, '123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQ0STUVWXYZ!"#$%&\'()*+,@./:;<=>', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl9FYC6l.js', 83, '123456789abcdefghijklmnopqr0tuvwxyzABCDETGHIJKLMNOPQRS>UVWXYZ!"#$%&\'()*+,-./:;<=F', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflCGk6yw/html5player.js', '4646B5181C6C3020DF1D9C7FCFEA.AD80ABF70C39BD369CCCAE780AFBB98FA6B6CB42766249D9488C288', '82C8849D94266724DC6B6AF89BBFA087EACCD963.B93C07FBA084ACAEFCF7C9D1FD0203C6C1815B6B', ), ( 'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', '312AA52209E3623129A412D56A40F11CB0AF14AE.3EE09501CB14E3BCDC3B2AE808BF3F1D14E7FBF12', '112AA5220913623229A412D56A40F11CB0AF14AE.3EE0950FCB14EEBCDC3B2AE808BF331D14E7FBF3', ), ( 'https://www.youtube.com/s/player/6ed0d907/player_ias.vflset/en_US/base.js', '2aq0aqSyOoJXtK73m-uME_jv7-pT15gOFC02RFkGMqWpzEICs69VdbwQ0LDp1v7j8xx92efCJlYFYb1sUkkBSPOlPmXgIARw8JQ0qOAOAA', 'AOq0QJ8wRAIgXmPlOPSBkkUs1bYFYlJCfe29xx8j7v1pDL2QwbdV96sCIEzpWqMGkFR20CFOg51Tp-7vj_EMu-m37KtXJoOySqa0', ), ] _NSIG_TESTS = [ ( 'https://www.youtube.com/s/player/7862ca1f/player_ias.vflset/en_US/base.js', 'X_LCxVDjAavgE5t', 'yxJ1dM6iz5ogUg', ), ( 'https://www.youtube.com/s/player/9216d1f7/player_ias.vflset/en_US/base.js', 'SLp9F5bwjAdhE9F-', 'gWnb9IK2DJ8Q1w', ), ( 'https://www.youtube.com/s/player/f8cb7a3b/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', 'ivXHpm7qJjJN', ), ( 'https://www.youtube.com/s/player/2dfe380c/player_ias.vflset/en_US/base.js', 'oBo2h5euWy6osrUt', '3DIBbn3qdQ', ), ( 'https://www.youtube.com/s/player/f1ca6900/player_ias.vflset/en_US/base.js', 'cu3wyu6LQn2hse', 'jvxetvmlI9AN9Q', ), ( 'https://www.youtube.com/s/player/8040e515/player_ias.vflset/en_US/base.js', 'wvOFaY-yjgDuIEg5', 'HkfBFDHmgw4rsw', ), ( 'https://www.youtube.com/s/player/e06dea74/player_ias.vflset/en_US/base.js', 'AiuodmaDDYw8d3y4bf', 'ankd8eza2T6Qmw', ), ( 'https://www.youtube.com/s/player/5dd88d1d/player-plasma-ias-phone-en_US.vflset/base.js', 'kSxKFLeqzv_ZyHSAt', 'n8gS8oRlHOxPFA', ), ( 'https://www.youtube.com/s/player/324f67b9/player_ias.vflset/en_US/base.js', 'xdftNy7dh9QGnhW', '22qLGxrmX8F1rA', ), ( 'https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', 'TDCstCG66tEAO5pR9o', 'dbxNtZ14c-yWyw', ), ( 'https://www.youtube.com/s/player/c81bbb4a/player_ias.vflset/en_US/base.js', 'gre3EcLurNY2vqp94', 'Z9DfGxWP115WTg', ), ( 'https://www.youtube.com/s/player/1f7d5369/player_ias.vflset/en_US/base.js', 'batNX7sYqIJdkJ', 'IhOkL_zxbkOZBw', ), ( 'https://www.youtube.com/s/player/009f1d77/player_ias.vflset/en_US/base.js', '5dwFHw8aFWQUQtffRq', 'audescmLUzI3jw', ), ( 'https://www.youtube.com/s/player/dc0c6770/player_ias.vflset/en_US/base.js', '5EHDMgYLV6HPGk_Mu-kk', 'n9lUJLHbxUI0GQ', ), ( 'https://www.youtube.com/s/player/113ca41c/player_ias.vflset/en_US/base.js', 'cgYl-tlYkhjT7A', 'hI7BBr2zUgcmMg', ), ( 'https://www.youtube.com/s/player/c57c113c/player_ias.vflset/en_US/base.js', 'M92UUMHa8PdvPd3wyM', '3hPqLJsiNZx7yA', ), ( 'https://www.youtube.com/s/player/5a3b6271/player_ias.vflset/en_US/base.js', 'B2j7f_UPT4rfje85Lu_e', 'm5DmNymaGQ5RdQ', ), ( 'https://www.youtube.com/s/player/7a062b77/player_ias.vflset/en_US/base.js', 'NRcE3y3mVtm_cV-W', 'VbsCYUATvqlt5w', ), ( 'https://www.youtube.com/s/player/dac945fd/player_ias.vflset/en_US/base.js', 'o8BkRxXhuYsBCWi6RplPdP', '3Lx32v_hmzTm6A', ), ( 'https://www.youtube.com/s/player/6f20102c/player_ias.vflset/en_US/base.js', 'lE8DhoDmKqnmJJ', 'pJTTX6XyJP2BYw', ), ( 'https://www.youtube.com/s/player/cfa9e7cb/player_ias.vflset/en_US/base.js', 'aCi3iElgd2kq0bxVbQ', 'QX1y8jGb2IbZ0w', ), ( 'https://www.youtube.com/s/player/8c7583ff/player_ias.vflset/en_US/base.js', '1wWCVpRR96eAmMI87L', 'KSkWAVv1ZQxC3A', ), ( 'https://www.youtube.com/s/player/b7910ca8/player_ias.vflset/en_US/base.js', '_hXMCwMt9qE310D', 'LoZMgkkofRMCZQ', ), ( 'https://www.youtube.com/s/player/590f65a6/player_ias.vflset/en_US/base.js', '1tm7-g_A9zsI8_Lay_', 'xI4Vem4Put_rOg', ), ( 'https://www.youtube.com/s/player/b22ef6e7/player_ias.vflset/en_US/base.js', 'b6HcntHGkvBLk_FRf', 'kNPW6A7FyP2l8A', ), ( 'https://www.youtube.com/s/player/3400486c/player_ias.vflset/en_US/base.js', 'lL46g3XifCKUZn1Xfw', 'z767lhet6V2Skl', ), ( 'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js', '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw', ), ( 'https://www.youtube.com/s/player/b12cc44b/player_ias.vflset/en_US/base.js', 'keLa5R2U00sR9SQK', 'N1OGyujjEwMnLw', ), ] @is_download_test class TestPlayerInfo(unittest.TestCase): def test_youtube_extract_player_info(self): PLAYER_URLS = ( ('https://www.youtube.com/s/player/4c3f79c5/player_ias.vflset/en_US/base.js', '4c3f79c5'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/en_US/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player_ias.vflset/fr_FR/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-en_US.vflset/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-phone-de_DE.vflset/base.js', '64dddad9'), ('https://www.youtube.com/s/player/64dddad9/player-plasma-ias-tablet-en_US.vflset/base.js', '64dddad9'), # obsolete ('https://www.youtube.com/yts/jsbin/player_ias-vfle4-e03/en_US/base.js', 'vfle4-e03'), ('https://www.youtube.com/yts/jsbin/player_ias-vfl49f_g4/en_US/base.js', 'vfl49f_g4'), ('https://www.youtube.com/yts/jsbin/player_ias-vflCPQUIL/en_US/base.js', 'vflCPQUIL'), ('https://www.youtube.com/yts/jsbin/player-vflzQZbt7/en_US/base.js', 'vflzQZbt7'), ('https://www.youtube.com/yts/jsbin/player-en_US-vflaxXRn1/base.js', 'vflaxXRn1'), ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js', 'vflXGBaUN'), ('https://s.ytimg.com/yts/jsbin/html5player-en_US-vflKjOTVq/html5player.js', 'vflKjOTVq'), ) for player_url, expected_player_id in PLAYER_URLS: player_id = YoutubeIE._extract_player_info(player_url) self.assertEqual(player_id, expected_player_id) @is_download_test class TestSignature(unittest.TestCase): def setUp(self): TEST_DIR = os.path.dirname(os.path.abspath(__file__)) self.TESTDATA_DIR = os.path.join(TEST_DIR, 'testdata/sigs') if not os.path.exists(self.TESTDATA_DIR): os.mkdir(self.TESTDATA_DIR) def tearDown(self): with contextlib.suppress(OSError): for f in os.listdir(self.TESTDATA_DIR): os.remove(f) def t_factory(name, sig_func, url_pattern): def make_tfunc(url, sig_input, expected_sig): m = url_pattern.match(url) assert m, f'{url!r} should follow URL format' test_id = m.group('id') def test_func(self): basename = f'player-{name}-{test_id}.js' fn = os.path.join(self.TESTDATA_DIR, basename) if not os.path.exists(fn): urllib.request.urlretrieve(url, fn) with open(fn, encoding='utf-8') as testf: jscode = testf.read() self.assertEqual(sig_func(jscode, sig_input), expected_sig) test_func.__name__ = f'test_{name}_js_{test_id}' setattr(TestSignature, test_func.__name__, test_func) return make_tfunc def signature(jscode, sig_input): func = YoutubeIE(FakeYDL())._parse_sig_js(jscode) src_sig = ( str(string.printable[:sig_input]) if isinstance(sig_input, int) else sig_input) return func(src_sig) def n_sig(jscode, sig_input): funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode) return JSInterpreter(jscode).call_function(funcname, sig_input) make_sig_test = t_factory( 'signature', signature, re.compile(r'.*(?:-|/player/)(?P<id>[a-zA-Z0-9_-]+)(?:/.+\.js|(?:/watch_as3|/html5player)?\.[a-z]+)$')) for test_spec in _SIG_TESTS: make_sig_test(*test_spec) make_nsig_test = t_factory( 'nsig', n_sig, re.compile(r'.+/player/(?P<id>[a-zA-Z0-9_-]+)/.+.js$')) for test_spec in _NSIG_TESTS: make_nsig_test(*test_spec) if __name__ == '__main__': unittest.main() ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testcert.pem�����������������������������������������������������������������0000664�0000000�0000000�00000006150�14675634471�0016674�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN PRIVATE KEY----- MIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDMF0bAzaHAdIyB HRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaUYF1uTcNp Qx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQqO6BVg4+h A1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8A4CK58Ev mMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRhKxUhmw0J aobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/Mo83KyiP tKMCSQulAgMBAAECggEALCfBDAexPjU5DNoh6bIorUXxIJzxTNzNHCdvgbCGiA54 BBKPh8s6qwazpnjT6WQWDIg/O5zZufqjE4wM9x4+0Zoqfib742ucJO9wY4way6x4 Clt0xzbLPabB+MoZ4H7ip+9n2+dImhe7pGdYyOHoNYeOL57BBi1YFW42Hj6u/8pd 63YCXisto3Rz1YvRQVjwsrS+cRKZlzAFQRviL30jav7Wh1aWEfcXxjj4zhm8pJdk ITGtq6howz57M0NtX6hZnfe8ywzTnDFIGKIMA2cYHuYJcBh9bc4tCGubTvTKK9UE 8fM+f6UbfGqfpKCq1mcgs0XMoFDSzKS9+mSJn0+5JQKBgQD+OCKaeH3Yzw5zGnlw XuQfMJGNcgNr+ImjmvzUAC2fAZUJLAcQueE5kzMv5Fmd+EFE2CEX1Vit3tg0SXvA G+bq609doILHMA03JHnV1npO/YNIhG3AAtJlKYGxQNfWH9mflYj9mEui8ZFxG52o zWhHYuifOjjZszUR+/eio6NPzwKBgQDNhUBTrT8LIX4SE/EFUiTlYmWIvOMgXYvN 8Cm3IRNQ/yyphZaXEU0eJzfX5uCDfSVOgd6YM/2pRah+t+1Hvey4H8e0GVTu5wMP gkkqwKPGIR1YOmlw6ippqwvoJD7LuYrm6Q4D6e1PvkjwCq6lEndrOPmPrrXNd0JJ XO60y3U2SwKBgQDLkyZarryQXxcCI6Q10Tc6pskYDMIit095PUbTeiUOXNT9GE28 Hi32ziLCakk9kCysNasii81MxtQ54tJ/f5iGbNMMddnkKl2a19Hc5LjjAm4cJzg/ 98KGEhvyVqvAo5bBDZ06/rcrD+lZOzUglQS5jcIcqCIYa0LHWQ/wJLxFzwKBgFcZ 1SRhdSmDfUmuF+S4ZpistflYjC3IV5rk4NkS9HvMWaJS0nqdw4A3AMzItXgkjq4S DkOVLTkTI5Do5HAWRv/VwC5M2hkR4NMu1VGAKSisGiKtRsirBWSZMEenLNHshbjN Jrpz5rZ4H7NT46ZkCCZyFBpX4gb9NyOedjA7Via3AoGARF8RxbYjnEGGFuhnbrJB FTPR0vaL4faY3lOgRZ8jOG9V2c9Hzi/y8a8TU4C11jnJSDqYCXBTd5XN28npYxtD pjRsCwy6ze+yvYXPO7C978eMG3YRyj366NXUxnXN59ibwe/lxi2OD9z8J1LEdF6z VJua1Wn8HKxnXMI61DhTCSo= -----END PRIVATE KEY----- -----BEGIN CERTIFICATE----- MIIEEzCCAvugAwIBAgIJAK1haYi6gmSKMA0GCSqGSIb3DQEBCwUAMIGeMQswCQYD VQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEbMBkG A1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRsIHRl c3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhhZ0Bw aGloYWcuZGUwIBcNMTUwMTMwMDExNTA4WhgPMjExNTAxMDYwMTE1MDhaMIGeMQsw CQYDVQQGEwJERTEMMAoGA1UECAwDTlJXMRQwEgYDVQQHDAtEdWVzc2VsZG9yZjEb MBkGA1UECgwSeW91dHViZS1kbCBwcm9qZWN0MRkwFwYDVQQLDBB5b3V0dWJlLWRs IHRlc3RzMRIwEAYDVQQDDAlsb2NhbGhvc3QxHzAdBgkqhkiG9w0BCQEWEHBoaWhh Z0BwaGloYWcuZGUwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDMF0bA zaHAdIyBHRmnIp4vv40lGqEePmWqicCl0QZ0wsb5dNysSxSa7330M2QeQopGfdaU YF1uTcNpQx6ECgBSfg+RrOBI7r/u4F+sKX8MUXVaf/5QoBUrGNGSn/pp7HMGOuQq O6BVg4+hA1ySSwUG8mZItLRry1ISyErmW8b9xlqfd97uLME/5tX+sMelRFjUbAx8 A4CK58EvmMguHVTlXzx5RMdYcf1VScYcjlV/qA45uzP8zwI5aigfcmUD+tbGuQRh KxUhmw0JaobtOR6+JSOAULW5gYa/egE4dWLwbyM6b6eFbdnjlQzEA1EW7ChMPAW/ Mo83KyiPtKMCSQulAgMBAAGjUDBOMB0GA1UdDgQWBBTBUZoqhQkzHQ6xNgZfFxOd ZEVt8TAfBgNVHSMEGDAWgBTBUZoqhQkzHQ6xNgZfFxOdZEVt8TAMBgNVHRMEBTAD AQH/MA0GCSqGSIb3DQEBCwUAA4IBAQCUOCl3T/J9B08Z+ijfOJAtkbUaEHuVZb4x 5EpZSy2ZbkLvtsftMFieHVNXn9dDswQc5qjYStCC4o60LKw4M6Y63FRsAZ/DNaqb PY3jyCyuugZ8/sNf50vHYkAcF7SQYqOQFQX4TQsNUk2xMJIt7H0ErQFmkf/u3dg6 cy89zkT462IwxzSG7NNhIlRkL9o5qg+Y1mF9eZA1B0rcL6hO24PPTHOd90HDChBu SZ6XMi/LzYQSTf0Vg2R+uMIVlzSlkdcZ6sqVnnqeLL8dFyIa4e9sj/D4ZCYP8Mqe Z73H5/NNhmwCHRqVUTgm307xblQaWGhwAiDkaRvRW2aJQ0qGEdZK -----END CERTIFICATE----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/��������������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0016143�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/��������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0020425�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/ca.crt��������������������������������������������������0000664�0000000�0000000�00000001076�14675634471�0021526�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBfDCCASOgAwIBAgIUUgngoxFpuWft8gjj3uEFoqJyoJowCgYIKoZIzj0EAwIw FDESMBAGA1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEwMVoXDTM4MTAxNTAz MDEwMVowFDESMBAGA1UEAwwJeXRkbHB0ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D AQcDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCHYxFU KpcCfVt9aueRyUFi1TNkkkEZ9D6fbqNTMFEwHQYDVR0OBBYEFBdY2rVNLFGM6r1F iuamNDaiq0QoMB8GA1UdIwQYMBaAFBdY2rVNLFGM6r1FiuamNDaiq0QoMA8GA1Ud EwEB/wQFMAMBAf8wCgYIKoZIzj0EAwIDRwAwRAIgXJg2jio1kow2g/iP54Qq+iI2 m4EAvZiY0Im/Ni3PHawCIC6KCl6QcHANbeq8ckOXNGusjl6OWhvEM3uPBPhqskq1 -----END CERTIFICATE----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/ca.key��������������������������������������������������0000664�0000000�0000000�00000000343�14675634471�0021522�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN EC PRIVATE KEY----- MHcCAQEEIG2L1bHdl3PnaLiJ7Zm8aAGCj4GiVbSbXQcrJAdL+yqOoAoGCCqGSM49 AwEHoUQDQgAEcTaKMtIn2/1kgid1zXFpLm87FMT5PP3/bltKVVH3DLO//0kUslCH YxFUKpcCfVt9aueRyUFi1TNkkkEZ9D6fbg== -----END EC PRIVATE KEY----- ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/ca.srl��������������������������������������������������0000664�0000000�0000000�00000000051�14675634471�0021526�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������4A260C33C4D34612646E6321E1E767DF1A95EF0B ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/client.crt����������������������������������������������0000664�0000000�0000000�00000000711�14675634471�0022414�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY D0dB8M1kJw== -----END CERTIFICATE----- �������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/client.csr����������������������������������������������0000664�0000000�0000000�00000000547�14675634471�0022422�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE REQUEST----- MIHQMHcCAQAwFTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqG SM49AwEHA0IABKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq 3ZuZ7rubyuMSXNuH+2Cl9msSpJB2LhJs5kegADAKBggqhkjOPQQDAgNJADBGAiEA 1LZ72mtPmVxhGtdMvpZ0fyA68H2RC5IMHpLq18T55UcCIQDKpkXXVTvAzS0JioCq 6kiYq8Oxx6ZMoI+11k75/Kip1g== -----END CERTIFICATE REQUEST----- ���������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/client.key����������������������������������������������0000664�0000000�0000000�00000000343�14675634471�0022415�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN EC PRIVATE KEY----- MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== -----END EC PRIVATE KEY----- ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/clientencrypted.key�������������������������������������0000664�0000000�0000000�00000000472�14675634471�0024336�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN EC PRIVATE KEY----- Proc-Type: 4,ENCRYPTED DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= -----END EC PRIVATE KEY----- ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/clientwithencryptedkey.crt������������������������������0000664�0000000�0000000�00000001403�14675634471�0025736�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY D0dB8M1kJw== -----END CERTIFICATE----- -----BEGIN EC PRIVATE KEY----- Proc-Type: 4,ENCRYPTED DEK-Info: AES-256-CBC,4B39160146F15544922E553E08299A35 96A7/iBkIfTVb8r2812ued2pS49FfVY4Ppz/45OGF0uFayMtMl8/GuEBCamuhFXS rnOOpco96TTeeKZHqR45wnf4tgHM8IjoQ6H0EX3lVF19OHnArAgrGYtohWUGSyGn IgLJFdUewIjdI7XApTJprQFE5E2tETXFA95mCz88u1c= -----END EC PRIVATE KEY----- �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/clientwithkey.crt���������������������������������������0000664�0000000�0000000�00000001254�14675634471�0024024�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������-----BEGIN CERTIFICATE----- MIIBIzCBygIUSiYMM8TTRhJkbmMh4edn3xqV7wswCgYIKoZIzj0EAwIwFDESMBAG A1UEAwwJeXRkbHB0ZXN0MB4XDTIyMDQxNTAzMDEyN1oXDTM4MTAxNTAzMDEyN1ow FTETMBEGA1UEAwwKeXRkbHB0ZXN0MjBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IA BKREKVDWfLKZknzYg+BUkmTn43f2pl/LNSyKPtXo/UV7hhp6JXIq3ZuZ7rubyuMS XNuH+2Cl9msSpJB2LhJs5kcwCgYIKoZIzj0EAwIDSAAwRQIhAMRr46vO25/5nUhD aHp4L67AeSvrjvSFHfubyD3Kr5dwAiA8EfOgVxc8Qh6ozTcbXO/WnBfS48ZFRSQY D0dB8M1kJw== -----END CERTIFICATE----- -----BEGIN EC PRIVATE KEY----- MHcCAQEEIAW6h9hwT0Aha+JBukgmHnrKRPoqPNWYA86ic0UaKHs8oAoGCCqGSM49 AwEHoUQDQgAEpEQpUNZ8spmSfNiD4FSSZOfjd/amX8s1LIo+1ej9RXuGGnolcird m5nuu5vK4xJc24f7YKX2axKkkHYuEmzmRw== -----END EC PRIVATE KEY----- ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/certificate/instructions.md�����������������������������������������0000664�0000000�0000000�00000001327�14675634471�0023516�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Generate certificates for client cert tests ## CA ```sh openssl ecparam -name prime256v1 -genkey -noout -out ca.key openssl req -new -x509 -sha256 -days 6027 -key ca.key -out ca.crt -subj "/CN=ytdlptest" ``` ## Client ```sh openssl ecparam -name prime256v1 -genkey -noout -out client.key openssl ec -in client.key -out clientencrypted.key -passout pass:foobar -aes256 openssl req -new -sha256 -key client.key -out client.csr -subj "/CN=ytdlptest2" openssl x509 -req -in client.csr -CA ca.crt -CAkey ca.key -CAcreateserial -out client.crt -days 6027 -sha256 cp client.crt clientwithkey.crt cp client.crt clientwithencryptedkey.crt cat client.key >> clientwithkey.crt cat clientencrypted.key >> clientwithencryptedkey.crt ```���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/cookies/������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0017577�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/cookies/httponly_cookies.txt����������������������������������������0000664�0000000�0000000�00000000446�14675634471�0023741�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Netscape HTTP Cookie File # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. #HttpOnly_www.foobar.foobar FALSE / TRUE 2147483647 HTTPONLY_COOKIE HTTPONLY_COOKIE_VALUE www.foobar.foobar FALSE / TRUE 2147483647 JS_ACCESSIBLE_COOKIE JS_ACCESSIBLE_COOKIE_VALUE ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/cookies/malformed_cookies.txt���������������������������������������0000664�0000000�0000000�00000000520�14675634471�0024017�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Netscape HTTP Cookie File # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. # Cookie file entry with invalid number of fields - 6 instead of 7 www.foobar.foobar FALSE / FALSE 0 COOKIE # Cookie file entry with invalid expires at www.foobar.foobar FALSE / FALSE 1.7976931348623157e+308 COOKIE VALUE ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/cookies/session_cookies.txt�����������������������������������������0000664�0000000�0000000�00000000415�14675634471�0023537�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������# Netscape HTTP Cookie File # http://curl.haxx.se/rfc/cookie_spec.html # This is a generated file! Do not edit. www.foobar.foobar FALSE / TRUE YoutubeDLExpiresEmpty YoutubeDLExpiresEmptyValue www.foobar.foobar FALSE / TRUE 0 YoutubeDLExpires0 YoutubeDLExpires0Value ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/f4m/����������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0016631�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/f4m/custom_base_url.f4m���������������������������������������������0000664�0000000�0000000�00000001742�14675634471�0022433�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <manifest xmlns="http://ns.adobe.com/f4m/1.0"> <streamType>recorded</streamType> <baseURL>http://vod.livestream.com/events/0000000000673980/</baseURL> <duration>269.293</duration> <bootstrapInfo profile="named" id="bootstrap_1">AAAAm2Fic3QAAAAAAAAAAQAAAAPoAAAAAAAEG+0AAAAAAAAAAAAAAAAAAQAAABlhc3J0AAAAAAAAAAABAAAAAQAAAC4BAAAAVmFmcnQAAAAAAAAD6AAAAAAEAAAAAQAAAAAAAAAAAAAXcAAAAC0AAAAAAAQHQAAAE5UAAAAuAAAAAAAEGtUAAAEYAAAAAAAAAAAAAAAAAAAAAAA=</bootstrapInfo> <media url="b90f532f-b0f6-4f4e-8289-706d490b2fd8_2292" bootstrapInfoId="bootstrap_1" bitrate="2148" width="1280" height="720" videoCodec="avc1.4d401f" audioCodec="mp4a.40.2"> <metadata>AgAKb25NZXRhRGF0YQgAAAAIAAhkdXJhdGlvbgBAcNSwIMSbpgAFd2lkdGgAQJQAAAAAAAAABmhlaWdodABAhoAAAAAAAAAJZnJhbWVyYXRlAEA4/7DoLwW3AA12aWRlb2RhdGFyYXRlAECe1DLgjcobAAx2aWRlb2NvZGVjaWQAQBwAAAAAAAAADWF1ZGlvZGF0YXJhdGUAQGSimlvaPKQADGF1ZGlvY29kZWNpZABAJAAAAAAAAAAACQ==</metadata> </media> </manifest> ������������������������������yt-dlp-2024.09.27/test/testdata/ism/����������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0016733�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/ism/ec-3_test.Manifest����������������������������������������������0000664�0000000�0000000�00000007623�14675634471�0022221�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="utf-8"?><!--Transformed by VSMT using XSL stylesheet for rule Identity--><!-- Created with Unified Streaming Platform (version=1.10.12-18737) --><SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="370000000"><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="127802" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /><c t="0" d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="20053333" /><c d="19840000" /><c d="20053333" /><c d="20053334" /><c d="7253333" /></StreamIndex><StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Language="deu" Name="audio_deu_1" Chunks="19" Url="QualityLevels({bitrate})/Fragments(audio_deu_1={start time})?noStreamProfile=1"><QualityLevel Index="0" Bitrate="224000" CodecPrivateData="00063F000000AF87FBA7022DFB42A4D405CD93843BDD0700200F00" FourCCData="0700200F00" SamplingRate="48000" Channels="6" BitsPerSample="16" PacketSize="896" AudioTag="65534" FourCC="EC-3" /><c t="0" d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="20160000" /><c d="19840000" /><c d="8320000" /></StreamIndex><StreamIndex Type="video" QualityLevels="8" TimeScale="10000000" Language="deu" Name="video_deu" Chunks="19" Url="QualityLevels({bitrate})/Fragments(video_deu={start time})?noStreamProfile=1" MaxWidth="1920" MaxHeight="1080" DisplayWidth="1920" DisplayHeight="1080"><QualityLevel Index="0" Bitrate="23909" CodecPrivateData="000000016742C00CDB06077E5C05A808080A00000300020000030009C0C02EE0177CC6300F142AE00000000168CA8DC8" MaxWidth="384" MaxHeight="216" FourCC="AVC1" /><QualityLevel Index="1" Bitrate="403188" CodecPrivateData="00000001674D4014E98323B602D4040405000003000100000300320F1429380000000168EAECF2" MaxWidth="400" MaxHeight="224" FourCC="AVC1" /><QualityLevel Index="2" Bitrate="680365" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="3" Bitrate="1253465" CodecPrivateData="00000001674D401EE981405FF2E02D4040405000000300100000030320F162D3800000000168EAECF2" MaxWidth="640" MaxHeight="360" FourCC="AVC1" /><QualityLevel Index="4" Bitrate="2121558" CodecPrivateData="00000001674D401EECA0601BD80B50101014000003000400000300C83C58B6580000000168E93B3C80" MaxWidth="768" MaxHeight="432" FourCC="AVC1" /><QualityLevel Index="5" Bitrate="3275545" CodecPrivateData="00000001674D4020ECA02802DD80B501010140000003004000000C83C60C65800000000168E93B3C80" MaxWidth="1280" MaxHeight="720" FourCC="AVC1" /><QualityLevel Index="6" Bitrate="5300196" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><QualityLevel Index="7" Bitrate="8079312" CodecPrivateData="00000001674D4028ECA03C0113F2E02D4040405000000300100000030320F18319600000000168E93B3C80" MaxWidth="1920" MaxHeight="1080" FourCC="AVC1" /><c t="0" d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="20000000" /><c d="10000000" /></StreamIndex></SmoothStreamingMedia>�������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/ism/sintel.Manifest�������������������������������������������������0000664�0000000�0000000�00000055213�14675634471�0021727�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="utf-8"?> <!-- Created with Unified Streaming Platform (version=1.10.18-20255) --> <SmoothStreamingMedia MajorVersion="2" MinorVersion="0" TimeScale="10000000" Duration="8880746666"> <StreamIndex Type="audio" QualityLevels="1" TimeScale="10000000" Name="audio" Chunks="445" Url="QualityLevels({bitrate})/Fragments(audio={start time})"> <QualityLevel Index="0" Bitrate="128001" CodecPrivateData="1190" SamplingRate="48000" Channels="2" BitsPerSample="16" PacketSize="4" AudioTag="255" FourCC="AACL" /> <c t="0" d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="20053333" /> <c d="20053333" /> <c d="20053334" /> <c d="19840000" /> <c d="746666" /> </StreamIndex> <StreamIndex Type="text" QualityLevels="1" TimeScale="10000000" Language="eng" Subtype="CAPT" Name="textstream_eng" Chunks="11" Url="QualityLevels({bitrate})/Fragments(textstream_eng={start time})"> <QualityLevel Index="0" Bitrate="1000" CodecPrivateData="" FourCC="TTML" /> <c t="0" d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="600000000" /> <c d="240000000" /> </StreamIndex> <StreamIndex Type="video" QualityLevels="5" TimeScale="10000000" Name="video" Chunks="444" Url="QualityLevels({bitrate})/Fragments(video={start time})" MaxWidth="1688" MaxHeight="720" DisplayWidth="1689" DisplayHeight="720"> <QualityLevel Index="0" Bitrate="100000" CodecPrivateData="00000001674D401FDA0544EFFC2D002CBC40000003004000000C03C60CA80000000168EF32C8" MaxWidth="336" MaxHeight="144" FourCC="AVC1" /> <QualityLevel Index="1" Bitrate="326000" CodecPrivateData="00000001674D401FDA0241FE23FFC3BC83BA44000003000400000300C03C60CA800000000168EF32C8" MaxWidth="562" MaxHeight="240" FourCC="AVC1" /> <QualityLevel Index="2" Bitrate="698000" CodecPrivateData="00000001674D401FDA0350BFB97FF06AF06AD1000003000100000300300F1832A00000000168EF32C8" MaxWidth="844" MaxHeight="360" FourCC="AVC1" /> <QualityLevel Index="3" Bitrate="1493000" CodecPrivateData="00000001674D401FDA011C3DE6FFF0D890D871000003000100000300300F1832A00000000168EF32C8" MaxWidth="1126" MaxHeight="480" FourCC="AVC1" /> <QualityLevel Index="4" Bitrate="4482000" CodecPrivateData="00000001674D401FDA01A816F97FFC1ABC1AB440000003004000000C03C60CA80000000168EF32C8" MaxWidth="1688" MaxHeight="720" FourCC="AVC1" /> <c t="0" d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> <c d="20000000" /> </StreamIndex> </SmoothStreamingMedia> �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/m3u8/���������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0016737�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/m3u8/bipbop_16x9.m3u8�����������������������������������������������0000664�0000000�0000000�00000006326�14675634471�0021526�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#EXTM3U #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 1",AUTOSELECT=YES,DEFAULT=YES #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="bipbop_audio",LANGUAGE="eng",NAME="BipBop Audio 2",AUTOSELECT=NO,DEFAULT=NO,URI="alternate_audio_aac/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English",DEFAULT=YES,AUTOSELECT=YES,FORCED=NO,LANGUAGE="en",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/eng/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="English (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="en",URI="subtitles/eng_forced/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="fr",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/fra/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Français (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="fr",URI="subtitles/fra_forced/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="es",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/spa/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="Español (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="es",URI="subtitles/spa_forced/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語",DEFAULT=NO,AUTOSELECT=YES,FORCED=NO,LANGUAGE="ja",CHARACTERISTICS="public.accessibility.transcribes-spoken-dialog, public.accessibility.describes-music-and-sound",URI="subtitles/jpn/prog_index.m3u8" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="subs",NAME="日本語 (Forced)",DEFAULT=NO,AUTOSELECT=NO,FORCED=YES,LANGUAGE="ja",URI="subtitles/jpn_forced/prog_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=263851,CODECS="mp4a.40.2, avc1.4d400d",RESOLUTION=416x234,AUDIO="bipbop_audio",SUBTITLES="subs" gear1/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=28451,CODECS="avc1.4d400d",URI="gear1/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=577610,CODECS="mp4a.40.2, avc1.4d401e",RESOLUTION=640x360,AUDIO="bipbop_audio",SUBTITLES="subs" gear2/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=181534,CODECS="avc1.4d401e",URI="gear2/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=915905,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=960x540,AUDIO="bipbop_audio",SUBTITLES="subs" gear3/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=297056,CODECS="avc1.4d401f",URI="gear3/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=1030138,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1280x720,AUDIO="bipbop_audio",SUBTITLES="subs" gear4/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=339492,CODECS="avc1.4d401f",URI="gear4/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=1924009,CODECS="mp4a.40.2, avc1.4d401f",RESOLUTION=1920x1080,AUDIO="bipbop_audio",SUBTITLES="subs" gear5/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:BANDWIDTH=669554,CODECS="avc1.4d401f",URI="gear5/iframe_index.m3u8" #EXT-X-STREAM-INF:BANDWIDTH=41457,CODECS="mp4a.40.2",AUDIO="bipbop_audio",SUBTITLES="subs" gear0/prog_index.m3u8 ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/m3u8/img_bipbop_adv_example_fmp4.m3u8�������������������������������0000664�0000000�0000000�00000014223�14675634471�0025061�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#EXTM3U #EXT-X-VERSION:6 #EXT-X-INDEPENDENT-SEGMENTS #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2168183,BANDWIDTH=2177116,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v5/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7968416,BANDWIDTH=8001098,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v9/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6170000,BANDWIDTH=6312875,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v8/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4670769,BANDWIDTH=4943747,CODECS="avc1.64002a,mp4a.40.2",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v7/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3168702,BANDWIDTH=3216424,CODECS="avc1.640020,mp4a.40.2",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v6/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1265132,BANDWIDTH=1268994,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v4/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=895755,BANDWIDTH=902298,CODECS="avc1.64001e,mp4a.40.2",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v3/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=530721,BANDWIDTH=541052,CODECS="avc1.640015,mp4a.40.2",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud1",SUBTITLES="sub1" v2/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2390686,BANDWIDTH=2399619,CODECS="avc1.640020,ac-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v5/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=8190919,BANDWIDTH=8223601,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v9/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6392503,BANDWIDTH=6535378,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v8/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4893272,BANDWIDTH=5166250,CODECS="avc1.64002a,ac-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v7/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3391205,BANDWIDTH=3438927,CODECS="avc1.640020,ac-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v6/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1487635,BANDWIDTH=1491497,CODECS="avc1.64001e,ac-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v4/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1118258,BANDWIDTH=1124801,CODECS="avc1.64001e,ac-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v3/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=753224,BANDWIDTH=763555,CODECS="avc1.640015,ac-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud2",SUBTITLES="sub1" v2/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=2198686,BANDWIDTH=2207619,CODECS="avc1.640020,ec-3",RESOLUTION=960x540,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v5/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=7998919,BANDWIDTH=8031601,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v9/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=6200503,BANDWIDTH=6343378,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v8/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=4701272,BANDWIDTH=4974250,CODECS="avc1.64002a,ec-3",RESOLUTION=1920x1080,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v7/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=3199205,BANDWIDTH=3246927,CODECS="avc1.640020,ec-3",RESOLUTION=1280x720,FRAME-RATE=60.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v6/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=1295635,BANDWIDTH=1299497,CODECS="avc1.64001e,ec-3",RESOLUTION=768x432,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v4/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=926258,BANDWIDTH=932801,CODECS="avc1.64001e,ec-3",RESOLUTION=640x360,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v3/prog_index.m3u8 #EXT-X-STREAM-INF:AVERAGE-BANDWIDTH=561224,BANDWIDTH=571555,CODECS="avc1.640015,ec-3",RESOLUTION=480x270,FRAME-RATE=30.000,CLOSED-CAPTIONS="cc1",AUDIO="aud3",SUBTITLES="sub1" v2/prog_index.m3u8 #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=183689,BANDWIDTH=187492,CODECS="avc1.64002a",RESOLUTION=1920x1080,URI="v7/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=132672,BANDWIDTH=136398,CODECS="avc1.640020",RESOLUTION=1280x720,URI="v6/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=97767,BANDWIDTH=101378,CODECS="avc1.640020",RESOLUTION=960x540,URI="v5/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=75722,BANDWIDTH=77818,CODECS="avc1.64001e",RESOLUTION=768x432,URI="v4/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=63522,BANDWIDTH=65091,CODECS="avc1.64001e",RESOLUTION=640x360,URI="v3/iframe_index.m3u8" #EXT-X-I-FRAME-STREAM-INF:AVERAGE-BANDWIDTH=39678,BANDWIDTH=40282,CODECS="avc1.640015",RESOLUTION=480x270,URI="v2/iframe_index.m3u8" #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="2",URI="a1/prog_index.m3u8" #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud2",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a2/prog_index.m3u8" #EXT-X-MEDIA:TYPE=AUDIO,GROUP-ID="aud3",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,CHANNELS="6",URI="a3/prog_index.m3u8" #EXT-X-MEDIA:TYPE=CLOSED-CAPTIONS,GROUP-ID="cc1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,INSTREAM-ID="CC1" #EXT-X-MEDIA:TYPE=SUBTITLES,GROUP-ID="sub1",LANGUAGE="en",NAME="English",AUTOSELECT=YES,DEFAULT=YES,FORCED=NO,URI="s1/en/prog_index.m3u8" �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/mpd/����������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0016723�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/mpd/float_duration.mpd����������������������������������������������0000664�0000000�0000000�00000003263�14675634471�0022443�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" type="static" minBufferTime="PT2S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" mediaPresentationDuration="PT6014S"> <Period bitstreamSwitching="true"> <AdaptationSet mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1" segmentAlignment="true"> <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="ai_$RepresentationID$.mp4d" media="a_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate> <Representation id="318597" bandwidth="61587"></Representation> </AdaptationSet> <AdaptationSet mimeType="video/mp4" startWithSAP="1" segmentAlignment="true"> <SegmentTemplate timescale="1000000" presentationTimeOffset="0" initialization="vi_$RepresentationID$.mp4d" media="v_$RepresentationID$_$Number$.mp4d" duration="2000000.0" startNumber="0"></SegmentTemplate> <Representation id="318597" codecs="avc1.42001f" width="340" height="192" bandwidth="318597"></Representation> <Representation id="638590" codecs="avc1.42001f" width="512" height="288" bandwidth="638590"></Representation> <Representation id="1022565" codecs="avc1.4d001f" width="688" height="384" bandwidth="1022565"></Representation> <Representation id="2046506" codecs="avc1.4d001f" width="1024" height="576" bandwidth="2046506"></Representation> <Representation id="3998017" codecs="avc1.640029" width="1280" height="720" bandwidth="3998017"></Representation> <Representation id="5997485" codecs="avc1.640032" width="1920" height="1080" bandwidth="5997485"></Representation> </AdaptationSet> </Period> </MPD>���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/mpd/subtitles.mpd���������������������������������������������������0000664�0000000�0000000�00000024034�14675634471�0021446�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="utf-8"?> <!-- Created with Unified Streaming Platform (version=1.10.18-20255) --> <MPD xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:mpeg:dash:schema:mpd:2011" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd" type="static" mediaPresentationDuration="PT14M48S" maxSegmentDuration="PT1M" minBufferTime="PT10S" profiles="urn:mpeg:dash:profile:isoff-live:2011"> <Period id="1" duration="PT14M48S"> <BaseURL>dash/</BaseURL> <AdaptationSet id="1" group="1" contentType="audio" segmentAlignment="true" audioSamplingRate="48000" mimeType="audio/mp4" codecs="mp4a.40.2" startWithSAP="1"> <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2" /> <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" /> <SegmentTemplate timescale="48000" initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> <SegmentTimeline> <S t="0" d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="96256" r="2" /> <S d="95232" /> <S d="3584" /> </SegmentTimeline> </SegmentTemplate> <Representation id="audio=128001" bandwidth="128001"> </Representation> </AdaptationSet> <AdaptationSet id="2" group="3" contentType="text" lang="en" mimeType="application/mp4" codecs="stpp" startWithSAP="1"> <Role schemeIdUri="urn:mpeg:dash:role:2011" value="subtitle" /> <SegmentTemplate timescale="1000" initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> <SegmentTimeline> <S t="0" d="60000" r="9" /> <S d="24000" /> </SegmentTimeline> </SegmentTemplate> <Representation id="textstream_eng=1000" bandwidth="1000"> </Representation> </AdaptationSet> <AdaptationSet id="3" group="2" contentType="video" par="960:409" minBandwidth="100000" maxBandwidth="4482000" maxWidth="1689" maxHeight="720" segmentAlignment="true" mimeType="video/mp4" codecs="avc1.4D401F" startWithSAP="1"> <Role schemeIdUri="urn:mpeg:dash:role:2011" value="main" /> <SegmentTemplate timescale="12288" initialization="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$.dash" media="3144-kZT4LWMQw6Rh7Kpd-$RepresentationID$-$Time$.dash"> <SegmentTimeline> <S t="0" d="24576" r="443" /> </SegmentTimeline> </SegmentTemplate> <Representation id="video=100000" bandwidth="100000" width="336" height="144" sar="2880:2863" scanType="progressive"> </Representation> <Representation id="video=326000" bandwidth="326000" width="562" height="240" sar="115200:114929" scanType="progressive"> </Representation> <Representation id="video=698000" bandwidth="698000" width="844" height="360" sar="86400:86299" scanType="progressive"> </Representation> <Representation id="video=1493000" bandwidth="1493000" width="1126" height="480" sar="230400:230267" scanType="progressive"> </Representation> <Representation id="video=4482000" bandwidth="4482000" width="1688" height="720" sar="86400:86299" scanType="progressive"> </Representation> </AdaptationSet> </Period> </MPD> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/mpd/unfragmented.mpd������������������������������������������������0000664�0000000�0000000�00000003320�14675634471�0022102�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8" standalone="yes"?> <MPD mediaPresentationDuration="PT54.915S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-on-demand:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> <Period duration="PT54.915S"> <AdaptationSet segmentAlignment="true" subsegmentAlignment="true" subsegmentStartsWithSAP="1"> <Representation bandwidth="804261" codecs="avc1.4d401e" frameRate="30" height="360" id="VIDEO-1" mimeType="video/mp4" startWithSAP="1" width="360"> <BaseURL>DASH_360</BaseURL> <SegmentBase indexRange="915-1114" indexRangeExact="true"> <Initialization range="0-914"/> </SegmentBase> </Representation> <Representation bandwidth="608000" codecs="avc1.4d401e" frameRate="30" height="240" id="VIDEO-2" mimeType="video/mp4" startWithSAP="1" width="240"> <BaseURL>DASH_240</BaseURL> <SegmentBase indexRange="913-1112" indexRangeExact="true"> <Initialization range="0-912"/> </SegmentBase> </Representation> </AdaptationSet> <AdaptationSet> <Representation audioSamplingRate="48000" bandwidth="129870" codecs="mp4a.40.2" id="AUDIO-1" mimeType="audio/mp4" startWithSAP="1"> <AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/> <BaseURL>audio</BaseURL> <SegmentBase indexRange="832-1007" indexRangeExact="true"> <Initialization range="0-831"/> </SegmentBase> </Representation> </AdaptationSet> </Period> </MPD> ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/mpd/urls_only.mpd���������������������������������������������������0000664�0000000�0000000�00000053546�14675634471�0021470�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" ?> <MPD maxSegmentDuration="PT0H0M10.000S" mediaPresentationDuration="PT0H4M1.728S" minBufferTime="PT1.500S" profiles="urn:mpeg:dash:profile:isoff-main:2011" type="static" xmlns="urn:mpeg:dash:schema:mpd:2011"> <Period duration="PT0H4M1.728S"> <AdaptationSet bitstreamSwitching="true" lang="und" maxHeight="1080" maxWidth="1920" par="16:9" segmentAlignment="true"> <ContentComponent contentType="video" id="1"/> <Representation audioSamplingRate="44100" bandwidth="200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="144" id="h264_aac_144p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="256"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_144p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="240" id="h264_aac_240p_m4s" mimeType="video/mp4" sar="160:159" startWithSAP="1" width="424"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_240p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="800000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="360" id="h264_aac_360p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="640"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_360p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="1200000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="480" id="h264_aac_480p_m4s" mimeType="video/mp4" sar="320:321" startWithSAP="1" width="856"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_480p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="1600000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="576" id="h264_aac_576p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1024"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_576p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="2400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="720" id="h264_aac_720p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1280"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_720p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> <Representation audioSamplingRate="44100" bandwidth="4400000" codecs="avc3.42c01e,mp4a.40.2" frameRate="25" height="1080" id="h264_aac_1080p_m4s" mimeType="video/mp4" sar="1:1" startWithSAP="1" width="1920"> <SegmentList duration="10000" timescale="1000"> <Initialization sourceURL="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/init/432f65a0.mp4"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/0/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/1/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/2/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/3/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/4/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/5/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/6/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/7/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/8/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/9/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/10/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/11/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/12/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/13/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/14/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/15/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/16/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/17/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/18/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/19/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/20/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/21/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/22/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/23/432f65a0.m4s"/> <SegmentURL media="../vd_5999c902ea707c67d8e267a9_1503250723/h264_aac_1080p_m4s/24/432f65a0.m4s"/> </SegmentList> </Representation> </AdaptationSet> </Period> </MPD> ����������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/thumbnails/���������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0020311�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/thumbnails/foo %d bar/����������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0022072�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/thumbnails/foo %d bar/foo_%d.webp�����������������������������������0000664�0000000�0000000�00000007530�14675634471�0024111�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������RIFFP��WEBPVP8 D��a�*5>HL%#"!I gnvUÿ1\}p][Gǭ_]7?akiyyyU_L#�?;#D{�pBP?C?|7Googt=j/ >ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>ՏucXGV>пIZ}2I6-;a?竬r`P%īRѾ9l*̊vvVh,7iѺK,εo`bTLK��2u`;fiF͘JB/`y=UeF/;QQ nJ U0-PZs-UKر+]Rz@!<DaG8?`}f'q!m|Ļ4kuɜ10!FUE3E= ߗLBbTxdJqbږȖ cgi.݊Kt8Fs=![Di߁ (q룥K&YozsM%b9f^�0>'/4*P̎@|:zdPkˑ@-H!Z:}wh:}h:}h:}h:}h:}h:}h:����������d$gRAZwA*((*v`5bXvysW=ko_0f/xU~Wr'[rX~e,EJ_Kne%4Z@@Wbt˻$| ~D흗mk'/ZROUh;Ƨ]й3xO<&جDF,]b:cϚ:|Ä ҖP/aHUS* f#} _d˹7БN,Jzf 6pl,Vk7g:$jШT3sU9ˣ뙙Gs_O :F QFCkn>߿xsW~o%HAI5Ar+,Z2�.E $_P:6q�DQjTOǴeӰsݍ:xYbub:fH1{f:QپPrO@HXG޲sDONqEjC9=.*^f-~,&nێ9P[6 )ZgZoF<Y1Z]l5~ȍń{uT[}'gLiwV o5C[ܙͶcZbYthduE@53"yb)]2pxL?twa>x!M5&P^YӳWbX-D#&*kwEq@y~Oa tzL'Wx-* Xt A. x}:3Iܚ y 3y0[Us`$G͏A0Ipٔ}<<!?|)soUřȬ)yg)hyŲoó=u^}ly#\?Ai߸O`eq? W&UcS?a)zW:9lm[^:9 g *y$ER>=8 FT{=yn_bG?R]m |f/]Lj[{T3fPg,vK_ӻ(x U4^U!mw9ŴXLzGYU��V*dդ«%4Ŷ/K-9-$|3�{=\U G}{dy[cQڑOpTn{;ͧ.B0/J W*pnlK%0=DiY㊐$7oT\G}*}NUyB^8ۧy,VdU!ռNt3%#_{Nlqmr|DϧR|>[\4viBi)zgex{jG(,v00 ʺ{D]eD(BJK�)�wwzm\).3gBAzVF<@b h6\g ׻A͛SR?,] kEDe;70;UGS@:F H%=4EhhE=~_YOGNjʻRD(8!vʾLf{\?aEȯDxZ7[8mRwBg+:hu_C L(%W9FfMki\hK t.f:Soۯ$;2>.4M=n q7\}o[ujw~I'|!ep9MsSR<X<% Gr^{DZ+k(q=--#DPEPeMiƹ\N̫XU˸}h 60sQmrKvU�< ,-vP) #|EXLQ\06Û&F#f>Ʈżw2ҋz jO^ʠ6B{_̜)vn@pQ|!y"MVaN|3LB8ᖲ=vkMG]ӑ# Fdtgh,-Xz<ä҄jv"%e:� Fs>dT݅9z39鎁)5+ÖӜRsHBj H,<G^ sL`qGdrk,~Tjl1 DKȪ0긱D ~rǾϭlae'XҭgA�D^3L0wfEdVcvS$91VESٶF9Zw]ȟMOt0 g1X^RUiXAoCy:+7dkHP3v;jWS7k\[-Jh#LVTS -.S%?c@A?BqsXQ0`<hu>`7TtY/yF�Rvc 3IeUӐC9%XѤ1<RlAN\6Y5P98%BM<y~s�DqQ"ӵMM;;qA)Xi-W^x�):O m:!21JMӷ2}X |V汝OċP <CY GWJh:_^zR 9t4EsYVmإ*?u^XÃ~d^TQ;x:y HܤOY *i<-0(-c ih݌]F73vTvɑwB,˓$!r?lm7jC"j XQaUؗIí{^YITg[>e<Ёwi<*!DxO¦h`h}u0.MEzi:f~a(ݴSbٛ􀚐a}U@Ⱥ J}Ǎ!ly҄AM܃xo2S2h^^Q󀙏;lQ͜|㟱QM 7˛DrϐL ^LX!Azo:.h^C2V 2a'pȡ~!SN]!J;M:n)orXGXe& /czn ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/xspf/���������������������������������������������������������������0000775�0000000�0000000�00000000000�14675634471�0017123�5����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/test/testdata/xspf/foo_xspf.xspf��������������������������������������������������0000664�0000000�0000000�00000002551�14675634471�0021653�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?xml version="1.0" encoding="UTF-8"?> <playlist version="1" xmlns="http://xspf.org/ns/0/"> <date>2018-03-09T18:01:43Z</date> <trackList> <track> <location>cd1/track%201.mp3</location> <title>Pandemonium Foilverb Visit http://bigbrother404.bandcamp.com Pandemonium EP 1 202416 ../%E3%83%88%E3%83%A9%E3%83%83%E3%82%AF%E3%80%80%EF%BC%92.mp3 Final Cartridge (Nichico Twelve Remix) Visit http://bigbrother404.bandcamp.com Foilverb Pandemonium EP 2 255857 track3.mp3 https://example.com/track3.mp3 Rebuilding Nightingale Visit http://bigbrother404.bandcamp.com Foilverb Pandemonium EP 3 287915 yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/000077500000000000000000000000001467563447100211775ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/extractor/000077500000000000000000000000001467563447100232125ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/extractor/_ignore.py000066400000000000000000000001421467563447100252030ustar00rootroot00000000000000from yt_dlp.extractor.common import InfoExtractor class IgnorePluginIE(InfoExtractor): pass yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/extractor/ignore.py000066400000000000000000000002671467563447100250540ustar00rootroot00000000000000from yt_dlp.extractor.common import InfoExtractor class IgnoreNotInAllPluginIE(InfoExtractor): pass class InAllPluginIE(InfoExtractor): pass __all__ = ['InAllPluginIE'] yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/extractor/normal.py000066400000000000000000000002351467563447100250540ustar00rootroot00000000000000from yt_dlp.extractor.common import InfoExtractor class NormalPluginIE(InfoExtractor): pass class _IgnoreUnderscorePluginIE(InfoExtractor): pass yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/postprocessor/000077500000000000000000000000001467563447100241245ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/yt_dlp_plugins/postprocessor/normal.py000066400000000000000000000001461467563447100257670ustar00rootroot00000000000000from yt_dlp.postprocessor.common import PostProcessor class NormalPluginPP(PostProcessor): pass yt-dlp-2024.09.27/test/testdata/zipped_plugins/000077500000000000000000000000001467563447100211775ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/zipped_plugins/yt_dlp_plugins/000077500000000000000000000000001467563447100242335ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/000077500000000000000000000000001467563447100262465ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/zipped_plugins/yt_dlp_plugins/extractor/zipped.py000066400000000000000000000001421467563447100301100ustar00rootroot00000000000000from yt_dlp.extractor.common import InfoExtractor class ZippedPluginIE(InfoExtractor): pass yt-dlp-2024.09.27/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/000077500000000000000000000000001467563447100271605ustar00rootroot00000000000000yt-dlp-2024.09.27/test/testdata/zipped_plugins/yt_dlp_plugins/postprocessor/zipped.py000066400000000000000000000001461467563447100310260ustar00rootroot00000000000000from yt_dlp.postprocessor.common import PostProcessor class ZippedPluginPP(PostProcessor): pass yt-dlp-2024.09.27/yt-dlp.cmd000066400000000000000000000000571467563447100152530ustar00rootroot00000000000000@py -Werror -Xdev "%~dp0yt_dlp\__main__.py" %* yt-dlp-2024.09.27/yt-dlp.sh000077500000000000000000000001621467563447100151220ustar00rootroot00000000000000#!/usr/bin/env sh exec "${PYTHON:-python3}" -Werror -Xdev "$(dirname "$(realpath "$0")")/yt_dlp/__main__.py" "$@" yt-dlp-2024.09.27/yt_dlp/000077500000000000000000000000001467563447100146465ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/YoutubeDL.py000066400000000000000000006371031467563447100171060ustar00rootroot00000000000000import collections import contextlib import copy import datetime as dt import errno import fileinput import functools import http.cookiejar import io import itertools import json import locale import operator import os import random import re import shutil import string import subprocess import sys import tempfile import time import tokenize import traceback import unicodedata from .cache import Cache from .compat import urllib # isort: split from .compat import compat_os_name, urllib_req_to_req from .cookies import LenientSimpleCookie, load_cookies from .downloader import FFmpegFD, get_suitable_downloader, shorten_protocol_name from .downloader.rtmp import rtmpdump_version from .extractor import gen_extractor_classes, get_info_extractor from .extractor.common import UnsupportedURLIE from .extractor.openload import PhantomJSwrapper from .minicurses import format_text from .networking import HEADRequest, Request, RequestDirector from .networking.common import _REQUEST_HANDLERS, _RH_PREFERENCES from .networking.exceptions import ( HTTPError, NoSupportingHandlers, RequestError, SSLError, network_exceptions, ) from .networking.impersonate import ImpersonateRequestHandler from .plugins import directories as plugin_directories from .postprocessor import _PLUGIN_CLASSES as plugin_pps from .postprocessor import ( EmbedThumbnailPP, FFmpegFixupDuplicateMoovPP, FFmpegFixupDurationPP, FFmpegFixupM3u8PP, FFmpegFixupM4aPP, FFmpegFixupStretchedPP, FFmpegFixupTimestampPP, FFmpegMergerPP, FFmpegPostProcessor, FFmpegVideoConvertorPP, MoveFilesAfterDownloadPP, get_postprocessor, ) from .postprocessor.ffmpeg import resolve_mapping as resolve_recode_mapping from .update import ( REPOSITORY, _get_system_deprecation, _make_label, current_git_head, detect_variant, ) from .utils import ( DEFAULT_OUTTMPL, IDENTITY, LINK_TEMPLATES, MEDIA_EXTENSIONS, NO_DEFAULT, NUMBER_RE, OUTTMPL_TYPES, POSTPROCESS_WHEN, STR_FORMAT_RE_TMPL, STR_FORMAT_TYPES, ContentTooShortError, DateRange, DownloadCancelled, DownloadError, EntryNotInPlaylist, ExistingVideoReached, ExtractorError, FormatSorter, GeoRestrictedError, ISO3166Utils, LazyList, MaxDownloadsReached, Namespace, PagedList, PlaylistEntries, Popen, PostProcessingError, ReExtractInfo, RejectedVideoReached, SameFileError, UnavailableVideoError, UserNotLive, YoutubeDLError, age_restricted, bug_reports_message, date_from_str, deprecation_warning, determine_ext, determine_protocol, encode_compat_str, encodeFilename, escapeHTML, expand_path, extract_basic_auth, filter_dict, float_or_none, format_bytes, format_decimal_suffix, format_field, formatSeconds, get_compatible_ext, get_domain, int_or_none, iri_to_uri, is_path_like, join_nonempty, locked_file, make_archive_id, make_dir, number_of_digits, orderedSet, orderedSet_from_options, parse_filesize, preferredencoding, prepend_extension, remove_terminal_sequences, render_table, replace_extension, sanitize_filename, sanitize_path, sanitize_url, shell_quote, str_or_none, strftime_or_none, subtitles_filename, supports_terminal_sequences, system_identifier, filesize_from_tbr, timetuple_from_msec, to_high_limit_path, traverse_obj, try_call, try_get, url_basename, variadic, version_tuple, windows_enable_vt_mode, write_json_file, write_string, ) from .utils._utils import _UnsafeExtensionError, _YDLLogger from .utils.networking import ( HTTPHeaderDict, clean_headers, clean_proxies, std_headers, ) from .version import CHANNEL, ORIGIN, RELEASE_GIT_HEAD, VARIANT, __version__ if compat_os_name == 'nt': import ctypes def _catch_unsafe_extension_error(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): try: return func(self, *args, **kwargs) except _UnsafeExtensionError as error: self.report_error( f'The extracted extension ({error.extension!r}) is unusual ' 'and will be skipped for safety reasons. ' f'If you believe this is an error{bug_reports_message(",")}') return wrapper class YoutubeDL: """YoutubeDL class. YoutubeDL objects are the ones responsible of downloading the actual video file and writing it to disk if the user has requested it, among some other tasks. In most cases there should be one per program. As, given a video URL, the downloader doesn't know how to extract all the needed information, task that InfoExtractors do, it has to pass the URL to one of them. For this, YoutubeDL objects have a method that allows InfoExtractors to be registered in a given order. When it is passed a URL, the YoutubeDL object handles it to the first InfoExtractor it finds that reports being able to handle it. The InfoExtractor extracts all the information about the video or videos the URL refers to, and YoutubeDL process the extracted information, possibly using a File Downloader to download the video. YoutubeDL objects accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of options instead. These options are available through the params attribute for the InfoExtractors to use. The YoutubeDL also registers itself as the downloader in charge for the InfoExtractors that are added to it, so this is a "mutual registration". Available options: username: Username for authentication purposes. password: Password for authentication purposes. videopassword: Password for accessing a video. ap_mso: Adobe Pass multiple-system operator identifier. ap_username: Multiple-system operator account username. ap_password: Multiple-system operator account password. usenetrc: Use netrc for authentication instead. netrc_location: Location of the netrc file. Defaults to ~/.netrc. netrc_cmd: Use a shell command to get credentials verbose: Print additional info to stdout. quiet: Do not print messages to stdout. no_warnings: Do not print out anything for warnings. forceprint: A dict with keys WHEN mapped to a list of templates to print to stdout. The allowed keys are video or any of the items in utils.POSTPROCESS_WHEN. For compatibility, a single list is also accepted print_to_file: A dict with keys WHEN (same as forceprint) mapped to a list of tuples with (template, filename) forcejson: Force printing info_dict as JSON. dump_single_json: Force printing the info_dict of the whole playlist (or video) as a single JSON line. force_write_download_archive: Force writing download archive regardless of 'skip_download' or 'simulate'. simulate: Do not download the video files. If unset (or None), simulate only if listsubtitles, listformats or list_thumbnails is used format: Video format code. see "FORMAT SELECTION" for more details. You can also pass a function. The function takes 'ctx' as argument and returns the formats to download. See "build_format_selector" for an implementation allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded. ignore_no_formats_error: Ignore "No video formats" error. Usefull for extracting metadata even if the video is not actually available for download (experimental) format_sort: A list of fields by which to sort the video formats. See "Sorting Formats" for more details. format_sort_force: Force the given format_sort. see "Sorting Formats" for more details. prefer_free_formats: Whether to prefer video formats with free containers over non-free ones of same quality. allow_multiple_video_streams: Allow multiple video streams to be merged into a single file allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file check_formats Whether to test if the formats are downloadable. Can be True (check all), False (check none), 'selected' (check selected formats), or None (check only if requested by extractor) paths: Dictionary of output paths. The allowed keys are 'home' 'temp' and the keys of OUTTMPL_TYPES (in utils/_utils.py) outtmpl: Dictionary of templates for output names. Allowed keys are 'default' and the keys of OUTTMPL_TYPES (in utils/_utils.py). For compatibility with youtube-dl, a single string can also be used outtmpl_na_placeholder: Placeholder for unavailable meta fields. restrictfilenames: Do not allow "&" and spaces in file names trim_file_name: Limit length of filename (extension excluded) windowsfilenames: Force the filenames to be windows compatible ignoreerrors: Do not stop on download/postprocessing errors. Can be 'only_download' to ignore only download errors. Default is 'only_download' for CLI, but False for API skip_playlist_after_errors: Number of allowed failures until the rest of the playlist is skipped allowed_extractors: List of regexes to match against extractor names that are allowed overwrites: Overwrite all video and metadata files if True, overwrite only non-video files if None and don't overwrite any file if False playlist_items: Specific indices of playlist to download. playlistrandom: Download playlist items in random order. lazy_playlist: Process playlist entries as they are received. matchtitle: Download only matching titles. rejecttitle: Reject downloads for matching titles. logger: Log messages to a logging.Logger instance. logtostderr: Print everything to stderr instead of stdout. consoletitle: Display progress in console window's titlebar. writedescription: Write the video description to a .description file writeinfojson: Write the video description to a .info.json file clean_infojson: Remove internal metadata from the infojson getcomments: Extract video comments. This will not be written to disk unless writeinfojson is also given writeannotations: Write the video annotations to a .annotations.xml file writethumbnail: Write the thumbnail image to a file allow_playlist_files: Whether to write playlists' description, infojson etc also to disk when using the 'write*' options write_all_thumbnails: Write all thumbnail formats to files writelink: Write an internet shortcut file, depending on the current platform (.url/.webloc/.desktop) writeurllink: Write a Windows internet shortcut file (.url) writewebloclink: Write a macOS internet shortcut file (.webloc) writedesktoplink: Write a Linux internet shortcut file (.desktop) writesubtitles: Write the video subtitles to a file writeautomaticsub: Write the automatically generated subtitles to a file listsubtitles: Lists all available subtitles for the video subtitlesformat: The format code for subtitles subtitleslangs: List of languages of the subtitles to download (can be regex). The list may contain "all" to refer to all the available subtitles. The language can be prefixed with a "-" to exclude it from the requested languages, e.g. ['all', '-live_chat'] keepvideo: Keep the video file after post-processing daterange: A utils.DateRange object, download only if the upload_date is in the range. skip_download: Skip the actual download of the video file cachedir: Location of the cache files in the filesystem. False to disable filesystem cache. noplaylist: Download single video instead of a playlist if in doubt. age_limit: An integer representing the user's age in years. Unsuitable videos for the given age are skipped. min_views: An integer representing the minimum view count the video must have in order to not be skipped. Videos without view count information are always downloaded. None for no limit. max_views: An integer representing the maximum view count. Videos that are more popular than that are not downloaded. Videos without view count information are always downloaded. None for no limit. download_archive: A set, or the name of a file where all downloads are recorded. Videos already present in the file are not downloaded again. break_on_existing: Stop the download process after attempting to download a file that is in the archive. break_per_url: Whether break_on_reject and break_on_existing should act on each input URL as opposed to for the entire queue cookiefile: File name or text stream from where cookies should be read and dumped to cookiesfrombrowser: A tuple containing the name of the browser, the profile name/path from where cookies are loaded, the name of the keyring, and the container name, e.g. ('chrome', ) or ('vivaldi', 'default', 'BASICTEXT') or ('firefox', 'default', None, 'Meta') legacyserverconnect: Explicitly allow HTTPS connection to servers that do not support RFC 5746 secure renegotiation nocheckcertificate: Do not verify SSL certificates client_certificate: Path to client certificate file in PEM format. May include the private key client_certificate_key: Path to private key file for client certificate client_certificate_password: Password for client certificate private key, if encrypted. If not provided and the key is encrypted, yt-dlp will ask interactively prefer_insecure: Use HTTP instead of HTTPS to retrieve information. (Only supported by some extractors) enable_file_urls: Enable file:// URLs. This is disabled by default for security reasons. http_headers: A dictionary of custom headers to be used for all requests proxy: URL of the proxy server to use geo_verification_proxy: URL of the proxy to use for IP address verification on geo-restricted sites. socket_timeout: Time to wait for unresponsive hosts, in seconds bidi_workaround: Work around buggy terminals without bidirectional text support, using fridibi debug_printtraffic:Print out sent and received HTTP traffic default_search: Prepend this string if an input url is not valid. 'auto' for elaborate guessing encoding: Use this encoding instead of the system-specified. extract_flat: Whether to resolve and process url_results further * False: Always process. Default for API * True: Never process * 'in_playlist': Do not process inside playlist/multi_video * 'discard': Always process, but don't return the result from inside playlist/multi_video * 'discard_in_playlist': Same as "discard", but only for playlists (not multi_video). Default for CLI wait_for_video: If given, wait for scheduled streams to become available. The value should be a tuple containing the range (min_secs, max_secs) to wait between retries postprocessors: A list of dictionaries, each with an entry * key: The name of the postprocessor. See yt_dlp/postprocessor/__init__.py for a list. * when: When to run the postprocessor. Allowed values are the entries of utils.POSTPROCESS_WHEN Assumed to be 'post_process' if not given progress_hooks: A list of functions that get called on download progress, with a dictionary with the entries * status: One of "downloading", "error", or "finished". Check this first and ignore unknown values. * info_dict: The extracted info_dict If status is one of "downloading", or "finished", the following properties may also be present: * filename: The final filename (always present) * tmpfilename: The filename we're currently writing to * downloaded_bytes: Bytes on disk * total_bytes: Size of the whole file, None if unknown * total_bytes_estimate: Guess of the eventual file size, None if unavailable. * elapsed: The number of seconds since download started. * eta: The estimated time in seconds, None if unknown * speed: The download speed in bytes/second, None if unknown * fragment_index: The counter of the currently downloaded video fragment. * fragment_count: The number of fragments (= individual files that will be merged) Progress hooks are guaranteed to be called at least once (with status "finished") if the download is successful. postprocessor_hooks: A list of functions that get called on postprocessing progress, with a dictionary with the entries * status: One of "started", "processing", or "finished". Check this first and ignore unknown values. * postprocessor: Name of the postprocessor * info_dict: The extracted info_dict Progress hooks are guaranteed to be called at least twice (with status "started" and "finished") if the processing is successful. merge_output_format: "/" separated list of extensions to use when merging formats. final_ext: Expected final extension; used to detect when the file was already downloaded and converted fixup: Automatically correct known faults of the file. One of: - "never": do nothing - "warn": only emit a warning - "detect_or_warn": check whether we can do anything about it, warn otherwise (default) source_address: Client-side IP address to bind to. impersonate: Client to impersonate for requests. An ImpersonateTarget (from yt_dlp.networking.impersonate) sleep_interval_requests: Number of seconds to sleep between requests during extraction sleep_interval: Number of seconds to sleep before each download when used alone or a lower bound of a range for randomized sleep before each download (minimum possible number of seconds to sleep) when used along with max_sleep_interval. max_sleep_interval:Upper bound of a range for randomized sleep before each download (maximum possible number of seconds to sleep). Must only be used along with sleep_interval. Actual sleep time will be a random float from range [sleep_interval; max_sleep_interval]. sleep_interval_subtitles: Number of seconds to sleep before each subtitle download listformats: Print an overview of available video formats and exit. list_thumbnails: Print a table of all thumbnails and exit. match_filter: A function that gets called for every video with the signature (info_dict, *, incomplete: bool) -> Optional[str] For backward compatibility with youtube-dl, the signature (info_dict) -> Optional[str] is also allowed. - If it returns a message, the video is ignored. - If it returns None, the video is downloaded. - If it returns utils.NO_DEFAULT, the user is interactively asked whether to download the video. - Raise utils.DownloadCancelled(msg) to abort remaining downloads when a video is rejected. match_filter_func in utils/_utils.py is one example for this. color: A Dictionary with output stream names as keys and their respective color policy as values. Can also just be a single color policy, in which case it applies to all outputs. Valid stream names are 'stdout' and 'stderr'. Valid color policies are one of 'always', 'auto', 'no_color', 'never', 'auto-tty' or 'no_color-tty'. geo_bypass: Bypass geographic restriction via faking X-Forwarded-For HTTP header geo_bypass_country: Two-letter ISO 3166-2 country code that will be used for explicit geographic restriction bypassing via faking X-Forwarded-For HTTP header geo_bypass_ip_block: IP range in CIDR notation that will be used similarly to geo_bypass_country external_downloader: A dictionary of protocol keys and the executable of the external downloader to use for it. The allowed protocols are default|http|ftp|m3u8|dash|rtsp|rtmp|mms. Set the value to 'native' to use the native downloader compat_opts: Compatibility options. See "Differences in default behavior". The following options do not work when used through the API: filename, abort-on-error, multistreams, no-live-chat, format-sort, no-clean-infojson, no-playlist-metafiles, no-keep-subs, no-attach-info-json, allow-unsafe-ext. Refer __init__.py for their implementation progress_template: Dictionary of templates for progress outputs. Allowed keys are 'download', 'postprocess', 'download-title' (console title) and 'postprocess-title'. The template is mapped on a dictionary with keys 'progress' and 'info' retry_sleep_functions: Dictionary of functions that takes the number of attempts as argument and returns the time to sleep in seconds. Allowed keys are 'http', 'fragment', 'file_access' download_ranges: A callback function that gets called for every video with the signature (info_dict, ydl) -> Iterable[Section]. Only the returned sections will be downloaded. Each Section is a dict with the following keys: * start_time: Start time of the section in seconds * end_time: End time of the section in seconds * title: Section title (Optional) * index: Section number (Optional) force_keyframes_at_cuts: Re-encode the video when downloading ranges to get precise cuts noprogress: Do not print the progress bar live_from_start: Whether to download livestreams videos from the start The following parameters are not used by YoutubeDL itself, they are used by the downloader (see yt_dlp/downloader/common.py): nopart, updatetime, buffersize, ratelimit, throttledratelimit, min_filesize, max_filesize, test, noresizebuffer, retries, file_access_retries, fragment_retries, continuedl, xattr_set_filesize, hls_use_mpegts, http_chunk_size, external_downloader_args, concurrent_fragment_downloads, progress_delta. The following options are used by the post processors: ffmpeg_location: Location of the ffmpeg/avconv binary; either the path to the binary or its containing directory. postprocessor_args: A dictionary of postprocessor/executable keys (in lower case) and a list of additional command-line arguments for the postprocessor/executable. The dict can also have "PP+EXE" keys which are used when the given exe is used by the given PP. Use 'default' as the name for arguments to passed to all PP For compatibility with youtube-dl, a single list of args can also be used The following options are used by the extractors: extractor_retries: Number of times to retry for known errors (default: 3) dynamic_mpd: Whether to process dynamic DASH manifests (default: True) hls_split_discontinuity: Split HLS playlists to different formats at discontinuities such as ad breaks (default: False) extractor_args: A dictionary of arguments to be passed to the extractors. See "EXTRACTOR ARGUMENTS" for details. E.g. {'youtube': {'skip': ['dash', 'hls']}} mark_watched: Mark videos watched (even with --simulate). Only for YouTube The following options are deprecated and may be removed in the future: break_on_reject: Stop the download process when encountering a video that has been filtered out. - `raise DownloadCancelled(msg)` in match_filter instead force_generic_extractor: Force downloader to use the generic extractor - Use allowed_extractors = ['generic', 'default'] playliststart: - Use playlist_items Playlist item to start at. playlistend: - Use playlist_items Playlist item to end at. playlistreverse: - Use playlist_items Download playlist items in reverse order. forceurl: - Use forceprint Force printing final URL. forcetitle: - Use forceprint Force printing title. forceid: - Use forceprint Force printing ID. forcethumbnail: - Use forceprint Force printing thumbnail URL. forcedescription: - Use forceprint Force printing description. forcefilename: - Use forceprint Force printing final filename. forceduration: - Use forceprint Force printing duration. allsubtitles: - Use subtitleslangs = ['all'] Downloads all the subtitles of the video (requires writesubtitles or writeautomaticsub) include_ads: - Doesn't work Download ads as well call_home: - Not implemented Boolean, true iff we are allowed to contact the yt-dlp servers for debugging. post_hooks: - Register a custom postprocessor A list of functions that get called as the final step for each video file, after all postprocessors have been called. The filename will be passed as the only argument. hls_prefer_native: - Use external_downloader = {'m3u8': 'native'} or {'m3u8': 'ffmpeg'}. Use the native HLS downloader instead of ffmpeg/avconv if True, otherwise use ffmpeg/avconv if False, otherwise use downloader suggested by extractor if None. prefer_ffmpeg: - avconv support is deprecated If False, use avconv instead of ffmpeg if both are available, otherwise prefer ffmpeg. youtube_include_dash_manifest: - Use extractor_args If True (default), DASH manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about DASH. (only for youtube) youtube_include_hls_manifest: - Use extractor_args If True (default), HLS manifests and related data will be downloaded and processed by extractor. You can reduce network I/O by disabling it if you don't care about HLS. (only for youtube) no_color: Same as `color='no_color'` no_overwrites: Same as `overwrites=False` """ _NUMERIC_FIELDS = { 'width', 'height', 'asr', 'audio_channels', 'fps', 'tbr', 'abr', 'vbr', 'filesize', 'filesize_approx', 'timestamp', 'release_timestamp', 'duration', 'view_count', 'like_count', 'dislike_count', 'repost_count', 'average_rating', 'comment_count', 'age_limit', 'start_time', 'end_time', 'chapter_number', 'season_number', 'episode_number', 'track_number', 'disc_number', 'release_year', } _format_fields = { # NB: Keep in sync with the docstring of extractor/common.py 'url', 'manifest_url', 'manifest_stream_number', 'ext', 'format', 'format_id', 'format_note', 'width', 'height', 'aspect_ratio', 'resolution', 'dynamic_range', 'tbr', 'abr', 'acodec', 'asr', 'audio_channels', 'vbr', 'fps', 'vcodec', 'container', 'filesize', 'filesize_approx', 'rows', 'columns', 'player_url', 'protocol', 'fragment_base_url', 'fragments', 'is_from_start', 'is_dash_periods', 'request_data', 'preference', 'language', 'language_preference', 'quality', 'source_preference', 'cookies', 'http_headers', 'stretched_ratio', 'no_resume', 'has_drm', 'extra_param_to_segment_url', 'extra_param_to_key_url', 'hls_aes', 'downloader_options', 'page_url', 'app', 'play_path', 'tc_url', 'flash_version', 'rtmp_live', 'rtmp_conn', 'rtmp_protocol', 'rtmp_real_time', } _deprecated_multivalue_fields = { 'album_artist': 'album_artists', 'artist': 'artists', 'composer': 'composers', 'creator': 'creators', 'genre': 'genres', } _format_selection_exts = { 'audio': set(MEDIA_EXTENSIONS.common_audio), 'video': {*MEDIA_EXTENSIONS.common_video, '3gp'}, 'storyboards': set(MEDIA_EXTENSIONS.storyboards), } def __init__(self, params=None, auto_init=True): """Create a FileDownloader object with the given options. @param auto_init Whether to load the default extractors and print header (if verbose). Set to 'no_verbose_header' to not print the header """ if params is None: params = {} self.params = params self._ies = {} self._ies_instances = {} self._pps = {k: [] for k in POSTPROCESS_WHEN} self._printed_messages = set() self._first_webpage_request = True self._post_hooks = [] self._progress_hooks = [] self._postprocessor_hooks = [] self._download_retcode = 0 self._num_downloads = 0 self._num_videos = 0 self._playlist_level = 0 self._playlist_urls = set() self.cache = Cache(self) self.__header_cookies = [] stdout = sys.stderr if self.params.get('logtostderr') else sys.stdout self._out_files = Namespace( out=stdout, error=sys.stderr, screen=sys.stderr if self.params.get('quiet') else stdout, console=None if compat_os_name == 'nt' else next( filter(supports_terminal_sequences, (sys.stderr, sys.stdout)), None), ) try: windows_enable_vt_mode() except Exception as e: self.write_debug(f'Failed to enable VT mode: {e}') if self.params.get('no_color'): if self.params.get('color') is not None: self.params.setdefault('_warnings', []).append( 'Overwriting params from "color" with "no_color"') self.params['color'] = 'no_color' term_allow_color = os.getenv('TERM', '').lower() != 'dumb' base_no_color = bool(os.getenv('NO_COLOR')) def process_color_policy(stream): stream_name = {sys.stdout: 'stdout', sys.stderr: 'stderr'}[stream] policy = traverse_obj(self.params, ('color', (stream_name, None), {str}, any)) or 'auto' if policy in ('auto', 'auto-tty', 'no_color-tty'): no_color = base_no_color if policy.endswith('tty'): no_color = policy.startswith('no_color') if term_allow_color and supports_terminal_sequences(stream): return 'no_color' if no_color else True return False assert policy in ('always', 'never', 'no_color'), policy return {'always': True, 'never': False}.get(policy, policy) self._allow_colors = Namespace(**{ name: process_color_policy(stream) for name, stream in self._out_files.items_ if name != 'console' }) system_deprecation = _get_system_deprecation() if system_deprecation: self.deprecated_feature(system_deprecation.replace('\n', '\n ')) if self.params.get('allow_unplayable_formats'): self.report_warning( f'You have asked for {self._format_err("UNPLAYABLE", self.Styles.EMPHASIS)} formats to be listed/downloaded. ' 'This is a developer option intended for debugging. \n' ' If you experience any issues while using this option, ' f'{self._format_err("DO NOT", self.Styles.ERROR)} open a bug report') if self.params.get('bidi_workaround', False): try: import pty master, slave = pty.openpty() width = shutil.get_terminal_size().columns width_args = [] if width is None else ['-w', str(width)] sp_kwargs = {'stdin': subprocess.PIPE, 'stdout': slave, 'stderr': self._out_files.error} try: self._output_process = Popen(['bidiv', *width_args], **sp_kwargs) except OSError: self._output_process = Popen(['fribidi', '-c', 'UTF-8', *width_args], **sp_kwargs) self._output_channel = os.fdopen(master, 'rb') except OSError as ose: if ose.errno == errno.ENOENT: self.report_warning( 'Could not find fribidi executable, ignoring --bidi-workaround. ' 'Make sure that fribidi is an executable file in one of the directories in your $PATH.') else: raise self.params['compat_opts'] = set(self.params.get('compat_opts', ())) self.params['http_headers'] = HTTPHeaderDict(std_headers, self.params.get('http_headers')) self._load_cookies(self.params['http_headers'].get('Cookie')) # compat self.params['http_headers'].pop('Cookie', None) if auto_init and auto_init != 'no_verbose_header': self.print_debug_header() def check_deprecated(param, option, suggestion): if self.params.get(param) is not None: self.report_warning(f'{option} is deprecated. Use {suggestion} instead') return True return False if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'): if self.params.get('geo_verification_proxy') is None: self.params['geo_verification_proxy'] = self.params['cn_verification_proxy'] check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"') check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"') check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"') for msg in self.params.get('_warnings', []): self.report_warning(msg) for msg in self.params.get('_deprecation_warnings', []): self.deprecated_feature(msg) if impersonate_target := self.params.get('impersonate'): if not self._impersonate_target_available(impersonate_target): raise YoutubeDLError( f'Impersonate target "{impersonate_target}" is not available. ' f'Use --list-impersonate-targets to see available targets. ' f'You may be missing dependencies required to support this target.') if 'list-formats' in self.params['compat_opts']: self.params['listformats_table'] = False if 'overwrites' not in self.params and self.params.get('nooverwrites') is not None: # nooverwrites was unnecessarily changed to overwrites # in 0c3d0f51778b153f65c21906031c2e091fcfb641 # This ensures compatibility with both keys self.params['overwrites'] = not self.params['nooverwrites'] elif self.params.get('overwrites') is None: self.params.pop('overwrites', None) else: self.params['nooverwrites'] = not self.params['overwrites'] if self.params.get('simulate') is None and any(( self.params.get('list_thumbnails'), self.params.get('listformats'), self.params.get('listsubtitles'), )): self.params['simulate'] = 'list_only' self.params.setdefault('forceprint', {}) self.params.setdefault('print_to_file', {}) # Compatibility with older syntax if not isinstance(params['forceprint'], dict): self.params['forceprint'] = {'video': params['forceprint']} if auto_init: self.add_default_info_extractors() if (sys.platform != 'win32' and sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and not self.params.get('restrictfilenames', False)): # Unicode filesystem API will throw errors (#1474, #13027) self.report_warning( 'Assuming --restrict-filenames since file system encoding ' 'cannot encode all characters. ' 'Set the LC_ALL environment variable to fix this.') self.params['restrictfilenames'] = True self._parse_outtmpl() # Creating format selector here allows us to catch syntax errors before the extraction self.format_selector = ( self.params.get('format') if self.params.get('format') in (None, '-') else self.params['format'] if callable(self.params['format']) else self.build_format_selector(self.params['format'])) hooks = { 'post_hooks': self.add_post_hook, 'progress_hooks': self.add_progress_hook, 'postprocessor_hooks': self.add_postprocessor_hook, } for opt, fn in hooks.items(): for ph in self.params.get(opt, []): fn(ph) for pp_def_raw in self.params.get('postprocessors', []): pp_def = dict(pp_def_raw) when = pp_def.pop('when', 'post_process') self.add_post_processor( get_postprocessor(pp_def.pop('key'))(self, **pp_def), when=when) def preload_download_archive(fn): """Preload the archive, if any is specified""" archive = set() if fn is None: return archive elif not is_path_like(fn): return fn self.write_debug(f'Loading archive file {fn!r}') try: with locked_file(fn, 'r', encoding='utf-8') as archive_file: for line in archive_file: archive.add(line.strip()) except OSError as ioe: if ioe.errno != errno.ENOENT: raise return archive self.archive = preload_download_archive(self.params.get('download_archive')) def warn_if_short_id(self, argv): # short YouTube ID starting with dash? idxs = [ i for i, a in enumerate(argv) if re.match(r'^-[0-9A-Za-z_-]{10}$', a)] if idxs: correct_argv = ( ['yt-dlp'] + [a for i, a in enumerate(argv) if i not in idxs] + ['--'] + [argv[i] for i in idxs] ) self.report_warning( 'Long argument string detected. ' f'Use -- to separate parameters and URLs, like this:\n{shell_quote(correct_argv)}') def add_info_extractor(self, ie): """Add an InfoExtractor object to the end of the list.""" ie_key = ie.ie_key() self._ies[ie_key] = ie if not isinstance(ie, type): self._ies_instances[ie_key] = ie ie.set_downloader(self) def get_info_extractor(self, ie_key): """ Get an instance of an IE with name ie_key, it will try to get one from the _ies list, if there's no instance it will create a new one and add it to the extractor list. """ ie = self._ies_instances.get(ie_key) if ie is None: ie = get_info_extractor(ie_key)() self.add_info_extractor(ie) return ie def add_default_info_extractors(self): """ Add the InfoExtractors returned by gen_extractors to the end of the list """ all_ies = {ie.IE_NAME.lower(): ie for ie in gen_extractor_classes()} all_ies['end'] = UnsupportedURLIE() try: ie_names = orderedSet_from_options( self.params.get('allowed_extractors', ['default']), { 'all': list(all_ies), 'default': [name for name, ie in all_ies.items() if ie._ENABLED], }, use_regex=True) except re.error as e: raise ValueError(f'Wrong regex for allowed_extractors: {e.pattern}') for name in ie_names: self.add_info_extractor(all_ies[name]) self.write_debug(f'Loaded {len(ie_names)} extractors') def add_post_processor(self, pp, when='post_process'): """Add a PostProcessor object to the end of the chain.""" assert when in POSTPROCESS_WHEN, f'Invalid when={when}' self._pps[when].append(pp) pp.set_downloader(self) def add_post_hook(self, ph): """Add the post hook""" self._post_hooks.append(ph) def add_progress_hook(self, ph): """Add the download progress hook""" self._progress_hooks.append(ph) def add_postprocessor_hook(self, ph): """Add the postprocessing progress hook""" self._postprocessor_hooks.append(ph) for pps in self._pps.values(): for pp in pps: pp.add_progress_hook(ph) def _bidi_workaround(self, message): if not hasattr(self, '_output_channel'): return message assert hasattr(self, '_output_process') assert isinstance(message, str) line_count = message.count('\n') + 1 self._output_process.stdin.write((message + '\n').encode()) self._output_process.stdin.flush() res = ''.join(self._output_channel.readline().decode() for _ in range(line_count)) return res[:-len('\n')] def _write_string(self, message, out=None, only_once=False): if only_once: if message in self._printed_messages: return self._printed_messages.add(message) write_string(message, out=out, encoding=self.params.get('encoding')) def to_stdout(self, message, skip_eol=False, quiet=None): """Print message to stdout""" if quiet is not None: self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument quiet. ' 'Use "YoutubeDL.to_screen" instead') if skip_eol is not False: self.deprecation_warning('"YoutubeDL.to_stdout" no longer accepts the argument skip_eol. ' 'Use "YoutubeDL.to_screen" instead') self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.out) def to_screen(self, message, skip_eol=False, quiet=None, only_once=False): """Print message to screen if not in quiet mode""" if self.params.get('logger'): self.params['logger'].debug(message) return if (self.params.get('quiet') if quiet is None else quiet) and not self.params.get('verbose'): return self._write_string( '{}{}'.format(self._bidi_workaround(message), ('' if skip_eol else '\n')), self._out_files.screen, only_once=only_once) def to_stderr(self, message, only_once=False): """Print message to stderr""" assert isinstance(message, str) if self.params.get('logger'): self.params['logger'].error(message) else: self._write_string(f'{self._bidi_workaround(message)}\n', self._out_files.error, only_once=only_once) def _send_console_code(self, code): if compat_os_name == 'nt' or not self._out_files.console: return self._write_string(code, self._out_files.console) def to_console_title(self, message): if not self.params.get('consoletitle', False): return message = remove_terminal_sequences(message) if compat_os_name == 'nt': if ctypes.windll.kernel32.GetConsoleWindow(): # c_wchar_p() might not be necessary if `message` is # already of type unicode() ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) else: self._send_console_code(f'\033]0;{message}\007') def save_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): return self._send_console_code('\033[22;0t') # Save the title on stack def restore_console_title(self): if not self.params.get('consoletitle') or self.params.get('simulate'): return self._send_console_code('\033[23;0t') # Restore the title from stack def __enter__(self): self.save_console_title() return self def save_cookies(self): if self.params.get('cookiefile') is not None: self.cookiejar.save() def __exit__(self, *args): self.restore_console_title() self.close() def close(self): self.save_cookies() if '_request_director' in self.__dict__: self._request_director.close() del self._request_director def trouble(self, message=None, tb=None, is_error=True): """Determine action to take when a download problem appears. Depending on if the downloader has been configured to ignore download errors or not, this method may throw an exception or not when errors are found, after printing the message. @param tb If given, is additional traceback information @param is_error Whether to raise error according to ignorerrors """ if message is not None: self.to_stderr(message) if self.params.get('verbose'): if tb is None: if sys.exc_info()[0]: # if .trouble has been called from an except block tb = '' if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) tb += encode_compat_str(traceback.format_exc()) else: tb_data = traceback.format_list(traceback.extract_stack()) tb = ''.join(tb_data) if tb: self.to_stderr(tb) if not is_error: return if not self.params.get('ignoreerrors'): if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: exc_info = sys.exc_info()[1].exc_info else: exc_info = sys.exc_info() raise DownloadError(message, exc_info) self._download_retcode = 1 Styles = Namespace( HEADERS='yellow', EMPHASIS='light blue', FILENAME='green', ID='green', DELIM='blue', ERROR='red', BAD_FORMAT='light red', WARNING='yellow', SUPPRESS='light black', ) def _format_text(self, handle, allow_colors, text, f, fallback=None, *, test_encoding=False): text = str(text) if test_encoding: original_text = text # handle.encoding can be None. See https://github.com/yt-dlp/yt-dlp/issues/2711 encoding = self.params.get('encoding') or getattr(handle, 'encoding', None) or 'ascii' text = text.encode(encoding, 'ignore').decode(encoding) if fallback is not None and text != original_text: text = fallback return format_text(text, f) if allow_colors is True else text if fallback is None else fallback def _format_out(self, *args, **kwargs): return self._format_text(self._out_files.out, self._allow_colors.out, *args, **kwargs) def _format_screen(self, *args, **kwargs): return self._format_text(self._out_files.screen, self._allow_colors.screen, *args, **kwargs) def _format_err(self, *args, **kwargs): return self._format_text(self._out_files.error, self._allow_colors.error, *args, **kwargs) def report_warning(self, message, only_once=False): """ Print the message to stderr, it will be prefixed with 'WARNING:' If stderr is a tty file the 'WARNING:' will be colored """ if self.params.get('logger') is not None: self.params['logger'].warning(message) else: if self.params.get('no_warnings'): return self.to_stderr(f'{self._format_err("WARNING:", self.Styles.WARNING)} {message}', only_once) def deprecation_warning(self, message, *, stacklevel=0): deprecation_warning( message, stacklevel=stacklevel + 1, printer=self.report_error, is_error=False) def deprecated_feature(self, message): if self.params.get('logger') is not None: self.params['logger'].warning(f'Deprecated Feature: {message}') self.to_stderr(f'{self._format_err("Deprecated Feature:", self.Styles.ERROR)} {message}', True) def report_error(self, message, *args, **kwargs): """ Do the same as trouble, but prefixes the message with 'ERROR:', colored in red if stderr is a tty file. """ self.trouble(f'{self._format_err("ERROR:", self.Styles.ERROR)} {message}', *args, **kwargs) def write_debug(self, message, only_once=False): """Log debug message or Print message to stderr""" if not self.params.get('verbose', False): return message = f'[debug] {message}' if self.params.get('logger'): self.params['logger'].debug(message) else: self.to_stderr(message, only_once) def report_file_already_downloaded(self, file_name): """Report file has already been fully downloaded.""" try: self.to_screen(f'[download] {file_name} has already been downloaded') except UnicodeEncodeError: self.to_screen('[download] The file has already been downloaded') def report_file_delete(self, file_name): """Report that existing file will be deleted.""" try: self.to_screen(f'Deleting existing file {file_name}') except UnicodeEncodeError: self.to_screen('Deleting existing file') def raise_no_formats(self, info, forced=False, *, msg=None): has_drm = info.get('_has_drm') ignored, expected = self.params.get('ignore_no_formats_error'), bool(msg) msg = msg or has_drm and 'This video is DRM protected' or 'No video formats found!' if forced or not ignored: raise ExtractorError(msg, video_id=info['id'], ie=info['extractor'], expected=has_drm or ignored or expected) else: self.report_warning(msg) def parse_outtmpl(self): self.deprecation_warning('"YoutubeDL.parse_outtmpl" is deprecated and may be removed in a future version') self._parse_outtmpl() return self.params['outtmpl'] def _parse_outtmpl(self): sanitize = IDENTITY if self.params.get('restrictfilenames'): # Remove spaces in the default template sanitize = lambda x: x.replace(' - ', ' ').replace(' ', '-') outtmpl = self.params.setdefault('outtmpl', {}) if not isinstance(outtmpl, dict): self.params['outtmpl'] = outtmpl = {'default': outtmpl} outtmpl.update({k: sanitize(v) for k, v in DEFAULT_OUTTMPL.items() if outtmpl.get(k) is None}) def get_output_path(self, dir_type='', filename=None): paths = self.params.get('paths', {}) assert isinstance(paths, dict), '"paths" parameter must be a dictionary' path = os.path.join( expand_path(paths.get('home', '').strip()), expand_path(paths.get(dir_type, '').strip()) if dir_type else '', filename or '') return sanitize_path(path, force=self.params.get('windowsfilenames')) @staticmethod def _outtmpl_expandpath(outtmpl): # expand_path translates '%%' into '%' and '$$' into '$' # correspondingly that is not what we want since we need to keep # '%%' intact for template dict substitution step. Working around # with boundary-alike separator hack. sep = ''.join(random.choices(string.ascii_letters, k=32)) outtmpl = outtmpl.replace('%%', f'%{sep}%').replace('$$', f'${sep}$') # outtmpl should be expand_path'ed before template dict substitution # because meta fields may contain env variables we don't want to # be expanded. E.g. for outtmpl "%(title)s.%(ext)s" and # title "Hello $PATH", we don't want `$PATH` to be expanded. return expand_path(outtmpl).replace(sep, '') @staticmethod def escape_outtmpl(outtmpl): """ Escape any remaining strings like %s, %abc% etc. """ return re.sub( STR_FORMAT_RE_TMPL.format('', '(?![%(\0])'), lambda mobj: ('' if mobj.group('has_key') else '%') + mobj.group(0), outtmpl) @classmethod def validate_outtmpl(cls, outtmpl): """ @return None or Exception object """ outtmpl = re.sub( STR_FORMAT_RE_TMPL.format('[^)]*', '[ljhqBUDS]'), lambda mobj: f'{mobj.group(0)[:-1]}s', cls._outtmpl_expandpath(outtmpl)) try: cls.escape_outtmpl(outtmpl) % collections.defaultdict(int) return None except ValueError as err: return err @staticmethod def _copy_infodict(info_dict): info_dict = dict(info_dict) info_dict.pop('__postprocessors', None) info_dict.pop('__pending_error', None) return info_dict def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False): """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict @param sanitize Whether to sanitize the output as a filename. For backward compatibility, a function can also be passed """ info_dict.setdefault('epoch', int(time.time())) # keep epoch consistent once set info_dict = self._copy_infodict(info_dict) info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs formatSeconds(info_dict['duration'], '-' if sanitize else ':') if info_dict.get('duration', None) is not None else None) info_dict['autonumber'] = int(self.params.get('autonumber_start', 1) - 1 + self._num_downloads) info_dict['video_autonumber'] = self._num_videos if info_dict.get('resolution') is None: info_dict['resolution'] = self.format_resolution(info_dict, default=None) # For fields playlist_index, playlist_autonumber and autonumber convert all occurrences # of %(field)s to %(field)0Nd for backward compatibility field_size_compat_map = { 'playlist_index': number_of_digits(info_dict.get('__last_playlist_index') or 0), 'playlist_autonumber': number_of_digits(info_dict.get('n_entries') or 0), 'autonumber': self.params.get('autonumber_size') or 5, } TMPL_DICT = {} EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljhqBUDS]')) MATH_FUNCTIONS = { '+': float.__add__, '-': float.__sub__, '*': float.__mul__, } # Field is of the form key1.key2... # where keys (except first) can be string, int, slice or "{field, ...}" FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'} # noqa: UP031 FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % { # noqa: UP031 'inner': FIELD_INNER_RE, 'field': rf'\w*(?:\.{FIELD_INNER_RE})*', } MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})' MATH_OPERATORS_RE = r'(?:{})'.format('|'.join(map(re.escape, MATH_FUNCTIONS.keys()))) INTERNAL_FORMAT_RE = re.compile(rf'''(?xs) (?P-)? (?P{FIELD_RE}) (?P(?:{MATH_OPERATORS_RE}{MATH_FIELD_RE})*) (?:>(?P.+?))? (?P (?P(?.*?))? (?:\|(?P.*?))? )$''') def _from_user_input(field): if field == ':': return ... elif ':' in field: return slice(*map(int_or_none, field.split(':'))) elif int_or_none(field) is not None: return int(field) return field def _traverse_infodict(fields): fields = [f for x in re.split(r'\.({.+?})\.?', fields) for f in ([x] if x.startswith('{') else x.split('.'))] for i in (0, -1): if fields and not fields[i]: fields.pop(i) for i, f in enumerate(fields): if not f.startswith('{'): fields[i] = _from_user_input(f) continue assert f.endswith('}'), f'No closing brace for {f} in {fields}' fields[i] = {k: list(map(_from_user_input, k.split('.'))) for k in f[1:-1].split(',')} return traverse_obj(info_dict, fields, traverse_string=True) def get_value(mdict): # Object traversal value = _traverse_infodict(mdict['fields']) # Negative if mdict['negate']: value = float_or_none(value) if value is not None: value *= -1 # Do maths offset_key = mdict['maths'] if offset_key: value = float_or_none(value) operator = None while offset_key: item = re.match( MATH_FIELD_RE if operator else MATH_OPERATORS_RE, offset_key).group(0) offset_key = offset_key[len(item):] if operator is None: operator = MATH_FUNCTIONS[item] continue item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1) offset = float_or_none(item) if offset is None: offset = float_or_none(_traverse_infodict(item)) try: value = operator(value, multiplier * offset) except (TypeError, ZeroDivisionError): return None operator = None # Datetime formatting if mdict['strf_format']: value = strftime_or_none(value, mdict['strf_format'].replace('\\,', ',')) # XXX: Workaround for https://github.com/yt-dlp/yt-dlp/issues/4485 if sanitize and value == '': value = None return value na = self.params.get('outtmpl_na_placeholder', 'NA') def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')): return sanitize_filename(str(value), restricted=restricted, is_id=( bool(re.search(r'(^|[_.])id(\.|$)', key)) if 'filename-sanitization' in self.params['compat_opts'] else NO_DEFAULT)) sanitizer = sanitize if callable(sanitize) else filename_sanitizer sanitize = bool(sanitize) def _dumpjson_default(obj): if isinstance(obj, (set, LazyList)): return list(obj) return repr(obj) class _ReplacementFormatter(string.Formatter): def get_field(self, field_name, args, kwargs): if field_name.isdigit(): return args[0], -1 raise ValueError('Unsupported field') replacement_formatter = _ReplacementFormatter() def create_key(outer_mobj): if not outer_mobj.group('has_key'): return outer_mobj.group(0) key = outer_mobj.group('key') mobj = re.match(INTERNAL_FORMAT_RE, key) value, replacement, default, last_field = None, None, na, '' while mobj: mobj = mobj.groupdict() default = mobj['default'] if mobj['default'] is not None else default value = get_value(mobj) last_field, replacement = mobj['fields'], mobj['replacement'] if value is None and mobj['alternate']: mobj = re.match(INTERNAL_FORMAT_RE, mobj['remaining'][1:]) else: break if None not in (value, replacement): try: value = replacement_formatter.format(replacement, value) except ValueError: value, default = None, na fmt = outer_mobj.group('format') if fmt == 's' and last_field in field_size_compat_map and isinstance(value, int): fmt = f'0{field_size_compat_map[last_field]:d}d' flags = outer_mobj.group('conversion') or '' str_fmt = f'{fmt[:-1]}s' if value is None: value, fmt = default, 's' elif fmt[-1] == 'l': # list delim = '\n' if '#' in flags else ', ' value, fmt = delim.join(map(str, variadic(value, allowed_types=(str, bytes)))), str_fmt elif fmt[-1] == 'j': # json value, fmt = json.dumps( value, default=_dumpjson_default, indent=4 if '#' in flags else None, ensure_ascii='+' not in flags), str_fmt elif fmt[-1] == 'h': # html value, fmt = escapeHTML(str(value)), str_fmt elif fmt[-1] == 'q': # quoted value = map(str, variadic(value) if '#' in flags else [value]) value, fmt = shell_quote(value, shell=True), str_fmt elif fmt[-1] == 'B': # bytes value = f'%{str_fmt}'.encode() % str(value).encode() value, fmt = value.decode('utf-8', 'ignore'), 's' elif fmt[-1] == 'U': # unicode normalized value, fmt = unicodedata.normalize( # "+" = compatibility equivalence, "#" = NFD 'NF{}{}'.format('K' if '+' in flags else '', 'D' if '#' in flags else 'C'), value), str_fmt elif fmt[-1] == 'D': # decimal suffix num_fmt, fmt = fmt[:-1].replace('#', ''), 's' value = format_decimal_suffix(value, f'%{num_fmt}f%s' if num_fmt else '%d%s', factor=1024 if '#' in flags else 1000) elif fmt[-1] == 'S': # filename sanitization value, fmt = filename_sanitizer(last_field, value, restricted='#' in flags), str_fmt elif fmt[-1] == 'c': if value: value = str(value)[0] else: fmt = str_fmt elif fmt[-1] not in 'rsa': # numeric value = float_or_none(value) if value is None: value, fmt = default, 's' if sanitize: # If value is an object, sanitize might convert it to a string # So we convert it to repr first if fmt[-1] == 'r': value, fmt = repr(value), str_fmt elif fmt[-1] == 'a': value, fmt = ascii(value), str_fmt if fmt[-1] in 'csra': value = sanitizer(last_field, value) key = '{}\0{}'.format(key.replace('%', '%\0'), outer_mobj.group('format')) TMPL_DICT[key] = value return '{prefix}%({key}){fmt}'.format(key=key, fmt=fmt, prefix=outer_mobj.group('prefix')) return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs): outtmpl, info_dict = self.prepare_outtmpl(outtmpl, info_dict, *args, **kwargs) return self.escape_outtmpl(outtmpl) % info_dict @_catch_unsafe_extension_error def _prepare_filename(self, info_dict, *, outtmpl=None, tmpl_type=None): assert None in (outtmpl, tmpl_type), 'outtmpl and tmpl_type are mutually exclusive' if outtmpl is None: outtmpl = self.params['outtmpl'].get(tmpl_type or 'default', self.params['outtmpl']['default']) try: outtmpl = self._outtmpl_expandpath(outtmpl) filename = self.evaluate_outtmpl(outtmpl, info_dict, True) if not filename: return None if tmpl_type in ('', 'temp'): final_ext, ext = self.params.get('final_ext'), info_dict.get('ext') if final_ext and ext and final_ext != ext and filename.endswith(f'.{final_ext}'): filename = replace_extension(filename, ext, final_ext) elif tmpl_type: force_ext = OUTTMPL_TYPES[tmpl_type] if force_ext: filename = replace_extension(filename, force_ext, info_dict.get('ext')) # https://github.com/blackjack4494/youtube-dlc/issues/85 trim_file_name = self.params.get('trim_file_name', False) if trim_file_name: no_ext, *ext = filename.rsplit('.', 2) filename = join_nonempty(no_ext[:trim_file_name], *ext, delim='.') return filename except ValueError as err: self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')') return None def prepare_filename(self, info_dict, dir_type='', *, outtmpl=None, warn=False): """Generate the output filename""" if outtmpl: assert not dir_type, 'outtmpl and dir_type are mutually exclusive' dir_type = None filename = self._prepare_filename(info_dict, tmpl_type=dir_type, outtmpl=outtmpl) if not filename and dir_type not in ('', 'temp'): return '' if warn: if not self.params.get('paths'): pass elif filename == '-': self.report_warning('--paths is ignored when an outputting to stdout', only_once=True) elif os.path.isabs(filename): self.report_warning('--paths is ignored since an absolute path is given in output template', only_once=True) if filename == '-' or not filename: return filename return self.get_output_path(dir_type, filename) def _match_entry(self, info_dict, incomplete=False, silent=False): """Returns None if the file should be downloaded""" _type = 'video' if 'playlist-match-filter' in self.params['compat_opts'] else info_dict.get('_type', 'video') assert incomplete or _type == 'video', 'Only video result can be considered complete' video_title = info_dict.get('title', info_dict.get('id', 'entry')) def check_filter(): if _type in ('playlist', 'multi_video'): return elif _type in ('url', 'url_transparent') and not try_call( lambda: self.get_info_extractor(info_dict['ie_key']).is_single_video(info_dict['url'])): return if 'title' in info_dict: # This can happen when we're just evaluating the playlist title = info_dict['title'] matchtitle = self.params.get('matchtitle', False) if matchtitle: if not re.search(matchtitle, title, re.IGNORECASE): return '"' + title + '" title did not match pattern "' + matchtitle + '"' rejecttitle = self.params.get('rejecttitle', False) if rejecttitle: if re.search(rejecttitle, title, re.IGNORECASE): return '"' + title + '" title matched reject pattern "' + rejecttitle + '"' date = info_dict.get('upload_date') if date is not None: date_range = self.params.get('daterange', DateRange()) if date not in date_range: return f'{date_from_str(date).isoformat()} upload date is not in range {date_range}' view_count = info_dict.get('view_count') if view_count is not None: min_views = self.params.get('min_views') if min_views is not None and view_count < min_views: return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views) max_views = self.params.get('max_views') if max_views is not None and view_count > max_views: return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views) if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')): return f'Skipping "{video_title}" because it is age restricted' match_filter = self.params.get('match_filter') if match_filter is None: return None cancelled = None try: try: ret = match_filter(info_dict, incomplete=incomplete) except TypeError: # For backward compatibility ret = None if incomplete else match_filter(info_dict) except DownloadCancelled as err: if err.msg is not NO_DEFAULT: raise ret, cancelled = err.msg, err if ret is NO_DEFAULT: while True: filename = self._format_screen(self.prepare_filename(info_dict), self.Styles.FILENAME) reply = input(self._format_screen( f'Download "{filename}"? (Y/n): ', self.Styles.EMPHASIS)).lower().strip() if reply in {'y', ''}: return None elif reply == 'n': if cancelled: raise type(cancelled)(f'Skipping {video_title}') return f'Skipping {video_title}' return ret if self.in_download_archive(info_dict): reason = ''.join(( format_field(info_dict, 'id', f'{self._format_screen("%s", self.Styles.ID)}: '), format_field(info_dict, 'title', f'{self._format_screen("%s", self.Styles.EMPHASIS)} '), 'has already been recorded in the archive')) break_opt, break_err = 'break_on_existing', ExistingVideoReached else: try: reason = check_filter() except DownloadCancelled as e: reason, break_opt, break_err = e.msg, 'match_filter', type(e) else: break_opt, break_err = 'break_on_reject', RejectedVideoReached if reason is not None: if not silent: self.to_screen('[download] ' + reason) if self.params.get(break_opt, False): raise break_err() return reason @staticmethod def add_extra_info(info_dict, extra_info): """Set the keys from extra_info in info dict if they are missing""" for key, value in extra_info.items(): info_dict.setdefault(key, value) def extract_info(self, url, download=True, ie_key=None, extra_info=None, process=True, force_generic_extractor=False): """ Extract and return the information dictionary of the URL Arguments: @param url URL to extract Keyword arguments: @param download Whether to download videos @param process Whether to resolve all unresolved references (URLs, playlist items). Must be True for download to work @param ie_key Use only the extractor with this key @param extra_info Dictionary containing the extra values to add to the info (For internal use only) @force_generic_extractor Force using the generic extractor (Deprecated; use ie_key='Generic') """ if extra_info is None: extra_info = {} if not ie_key and force_generic_extractor: ie_key = 'Generic' if ie_key: ies = {ie_key: self._ies[ie_key]} if ie_key in self._ies else {} else: ies = self._ies for key, ie in ies.items(): if not ie.suitable(url): continue if not ie.working(): self.report_warning('The program functionality for this site has been marked as broken, ' 'and will probably not work.') temp_id = ie.get_temp_id(url) if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': key}): self.to_screen(f'[download] {self._format_screen(temp_id, self.Styles.ID)}: ' 'has already been recorded in the archive') if self.params.get('break_on_existing', False): raise ExistingVideoReached break return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process) else: extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default']) self.report_error(f'No suitable extractor{format_field(ie_key, None, " (%s)")} found for URL {url}', tb=False if extractors_restricted else None) def _handle_extraction_exceptions(func): @functools.wraps(func) def wrapper(self, *args, **kwargs): while True: try: return func(self, *args, **kwargs) except (DownloadCancelled, LazyList.IndexError, PagedList.IndexError): raise except ReExtractInfo as e: if e.expected: self.to_screen(f'{e}; Re-extracting data') else: self.to_stderr('\r') self.report_warning(f'{e}; Re-extracting data') continue except GeoRestrictedError as e: msg = e.msg if e.countries: msg += '\nThis video is available in {}.'.format(', '.join( map(ISO3166Utils.short2full, e.countries))) msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.' self.report_error(msg) except ExtractorError as e: # An error we somewhat expected self.report_error(str(e), e.format_traceback()) except Exception as e: if self.params.get('ignoreerrors'): self.report_error(str(e), tb=encode_compat_str(traceback.format_exc())) else: raise break return wrapper def _wait_for_video(self, ie_result={}): if (not self.params.get('wait_for_video') or ie_result.get('_type', 'video') != 'video' or ie_result.get('formats') or ie_result.get('url')): return format_dur = lambda dur: '%02d:%02d:%02d' % timetuple_from_msec(dur * 1000)[:-1] last_msg = '' def progress(msg): nonlocal last_msg full_msg = f'{msg}\n' if not self.params.get('noprogress'): full_msg = msg + ' ' * (len(last_msg) - len(msg)) + '\r' elif last_msg: return self.to_screen(full_msg, skip_eol=True) last_msg = msg min_wait, max_wait = self.params.get('wait_for_video') diff = try_get(ie_result, lambda x: x['release_timestamp'] - time.time()) if diff is None and ie_result.get('live_status') == 'is_upcoming': diff = round(random.uniform(min_wait, max_wait) if (max_wait and min_wait) else (max_wait or min_wait), 0) self.report_warning('Release time of video is not known') elif ie_result and (diff or 0) <= 0: self.report_warning('Video should already be available according to extracted info') diff = min(max(diff or 0, min_wait or 0), max_wait or float('inf')) self.to_screen(f'[wait] Waiting for {format_dur(diff)} - Press Ctrl+C to try now') wait_till = time.time() + diff try: while True: diff = wait_till - time.time() if diff <= 0: progress('') raise ReExtractInfo('[wait] Wait period ended', expected=True) progress(f'[wait] Remaining time until next attempt: {self._format_screen(format_dur(diff), self.Styles.EMPHASIS)}') time.sleep(1) except KeyboardInterrupt: progress('') raise ReExtractInfo('[wait] Interrupted by user', expected=True) except BaseException as e: if not isinstance(e, ReExtractInfo): self.to_screen('') raise def _load_cookies(self, data, *, autoscope=True): """Loads cookies from a `Cookie` header This tries to work around the security vulnerability of passing cookies to every domain. See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj @param data The Cookie header as string to load the cookies from @param autoscope If `False`, scope cookies using Set-Cookie syntax and error for cookie without domains If `True`, save cookies for later to be stored in the jar with a limited scope If a URL, save cookies in the jar with the domain of the URL """ for cookie in LenientSimpleCookie(data).values(): if autoscope and any(cookie.values()): raise ValueError('Invalid syntax in Cookie Header') domain = cookie.get('domain') or '' expiry = cookie.get('expires') if expiry == '': # 0 is valid expiry = None prepared_cookie = http.cookiejar.Cookie( cookie.get('version') or 0, cookie.key, cookie.value, None, False, domain, True, True, cookie.get('path') or '', bool(cookie.get('path')), cookie.get('secure') or False, expiry, False, None, None, {}) if domain: self.cookiejar.set_cookie(prepared_cookie) elif autoscope is True: self.deprecated_feature( 'Passing cookies as a header is a potential security risk; ' 'they will be scoped to the domain of the downloaded urls. ' 'Please consider loading cookies from a file or browser instead.') self.__header_cookies.append(prepared_cookie) elif autoscope: self.report_warning( 'The extractor result contains an unscoped cookie as an HTTP header. ' f'If you are using yt-dlp with an input URL{bug_reports_message(before=",")}', only_once=True) self._apply_header_cookies(autoscope, [prepared_cookie]) else: self.report_error('Unscoped cookies are not allowed; please specify some sort of scoping', tb=False, is_error=False) def _apply_header_cookies(self, url, cookies=None): """Applies stray header cookies to the provided url This loads header cookies and scopes them to the domain provided in `url`. While this is not ideal, it helps reduce the risk of them being sent to an unintended destination while mostly maintaining compatibility. """ parsed = urllib.parse.urlparse(url) if not parsed.hostname: return for cookie in map(copy.copy, cookies or self.__header_cookies): cookie.domain = f'.{parsed.hostname}' self.cookiejar.set_cookie(cookie) @_handle_extraction_exceptions def __extract_info(self, url, ie, download, extra_info, process): self._apply_header_cookies(url) try: ie_result = ie.extract(url) except UserNotLive as e: if process: if self.params.get('wait_for_video'): self.report_warning(e) self._wait_for_video() raise if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) self.report_warning(f'Extractor {ie.IE_NAME} returned nothing{bug_reports_message()}') return if isinstance(ie_result, list): # Backwards compatibility: old IE result format ie_result = { '_type': 'compat_list', 'entries': ie_result, } if extra_info.get('original_url'): ie_result.setdefault('original_url', extra_info['original_url']) self.add_default_extra_info(ie_result, ie, url) if process: self._wait_for_video(ie_result) return self.process_ie_result(ie_result, download, extra_info) else: return ie_result def add_default_extra_info(self, ie_result, ie, url): if url is not None: self.add_extra_info(ie_result, { 'webpage_url': url, 'original_url': url, }) webpage_url = ie_result.get('webpage_url') if webpage_url: self.add_extra_info(ie_result, { 'webpage_url_basename': url_basename(webpage_url), 'webpage_url_domain': get_domain(webpage_url), }) if ie is not None: self.add_extra_info(ie_result, { 'extractor': ie.IE_NAME, 'extractor_key': ie.ie_key(), }) def process_ie_result(self, ie_result, download=True, extra_info=None): """ Take the result of the ie(may be modified) and resolve all unresolved references (URLs, playlist items). It will also download the videos if 'download'. Returns the resolved ie_result. """ if extra_info is None: extra_info = {} result_type = ie_result.get('_type', 'video') if result_type in ('url', 'url_transparent'): ie_result['url'] = sanitize_url( ie_result['url'], scheme='http' if self.params.get('prefer_insecure') else 'https') if ie_result.get('original_url') and not extra_info.get('original_url'): extra_info = {'original_url': ie_result['original_url'], **extra_info} extract_flat = self.params.get('extract_flat', False) if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or extract_flat is True): info_copy = ie_result.copy() ie = try_get(ie_result.get('ie_key'), self.get_info_extractor) if ie and not ie_result.get('id'): info_copy['id'] = ie.get_temp_id(ie_result['url']) self.add_default_extra_info(info_copy, ie, ie_result['url']) self.add_extra_info(info_copy, extra_info) info_copy, _ = self.pre_process(info_copy) self._fill_common_fields(info_copy, False) self.__forced_printings(info_copy) self._raise_pending_errors(info_copy) if self.params.get('force_write_download_archive', False): self.record_download_archive(info_copy) return ie_result if result_type == 'video': self.add_extra_info(ie_result, extra_info) ie_result = self.process_video_result(ie_result, download=download) self._raise_pending_errors(ie_result) additional_urls = (ie_result or {}).get('additional_urls') if additional_urls: # TODO: Improve MetadataParserPP to allow setting a list if isinstance(additional_urls, str): additional_urls = [additional_urls] self.to_screen( '[info] {}: {} additional URL(s) requested'.format(ie_result['id'], len(additional_urls))) self.write_debug('Additional URLs: "{}"'.format('", "'.join(additional_urls))) ie_result['additional_entries'] = [ self.extract_info( url, download, extra_info=extra_info, force_generic_extractor=self.params.get('force_generic_extractor')) for url in additional_urls ] return ie_result elif result_type == 'url': # We have to add extra_info to the results because it may be # contained in a playlist return self.extract_info( ie_result['url'], download, ie_key=ie_result.get('ie_key'), extra_info=extra_info) elif result_type == 'url_transparent': # Use the information from the embedding page info = self.extract_info( ie_result['url'], ie_key=ie_result.get('ie_key'), extra_info=extra_info, download=False, process=False) # extract_info may return None when ignoreerrors is enabled and # extraction failed with an error, don't crash and return early # in this case if not info: return info exempted_fields = {'_type', 'url', 'ie_key'} if not ie_result.get('section_end') and ie_result.get('section_start') is None: # For video clips, the id etc of the clip extractor should be used exempted_fields |= {'id', 'extractor', 'extractor_key'} new_result = info.copy() new_result.update(filter_dict(ie_result, lambda k, v: v is not None and k not in exempted_fields)) # Extracted info may not be a video result (i.e. # info.get('_type', 'video') != video) but rather an url or # url_transparent. In such cases outer metadata (from ie_result) # should be propagated to inner one (info). For this to happen # _type of info should be overridden with url_transparent. This # fixes issue from https://github.com/ytdl-org/youtube-dl/pull/11163. if new_result.get('_type') == 'url': new_result['_type'] = 'url_transparent' return self.process_ie_result( new_result, download=download, extra_info=extra_info) elif result_type in ('playlist', 'multi_video'): # Protect from infinite recursion due to recursively nested playlists # (see https://github.com/ytdl-org/youtube-dl/issues/27833) webpage_url = ie_result.get('webpage_url') # Playlists maynot have webpage_url if webpage_url and webpage_url in self._playlist_urls: self.to_screen( '[download] Skipping already downloaded playlist: {}'.format( ie_result.get('title')) or ie_result.get('id')) return self._playlist_level += 1 self._playlist_urls.add(webpage_url) self._fill_common_fields(ie_result, False) self._sanitize_thumbnails(ie_result) try: return self.__process_playlist(ie_result, download) finally: self._playlist_level -= 1 if not self._playlist_level: self._playlist_urls.clear() elif result_type == 'compat_list': self.report_warning( 'Extractor {} returned a compat_list result. ' 'It needs to be updated.'.format(ie_result.get('extractor'))) def _fixup(r): self.add_extra_info(r, { 'extractor': ie_result['extractor'], 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_domain': get_domain(ie_result['webpage_url']), 'extractor_key': ie_result['extractor_key'], }) return r ie_result['entries'] = [ self.process_ie_result(_fixup(r), download, extra_info) for r in ie_result['entries'] ] return ie_result else: raise Exception(f'Invalid result type: {result_type}') def _ensure_dir_exists(self, path): return make_dir(path, self.report_error) @staticmethod def _playlist_infodict(ie_result, strict=False, **kwargs): info = { 'playlist_count': ie_result.get('playlist_count'), 'playlist': ie_result.get('title') or ie_result.get('id'), 'playlist_id': ie_result.get('id'), 'playlist_title': ie_result.get('title'), 'playlist_uploader': ie_result.get('uploader'), 'playlist_uploader_id': ie_result.get('uploader_id'), 'playlist_channel': ie_result.get('channel'), 'playlist_channel_id': ie_result.get('channel_id'), **kwargs, } if strict: return info if ie_result.get('webpage_url'): info.update({ 'webpage_url': ie_result['webpage_url'], 'webpage_url_basename': url_basename(ie_result['webpage_url']), 'webpage_url_domain': get_domain(ie_result['webpage_url']), }) return { **info, 'playlist_index': 0, '__last_playlist_index': max(ie_result.get('requested_entries') or (0, 0)), 'extractor': ie_result['extractor'], 'extractor_key': ie_result['extractor_key'], } def __process_playlist(self, ie_result, download): """Process each entry in the playlist""" assert ie_result['_type'] in ('playlist', 'multi_video') common_info = self._playlist_infodict(ie_result, strict=True) title = common_info.get('playlist') or '' if self._match_entry(common_info, incomplete=True) is not None: return self.to_screen(f'[download] Downloading {ie_result["_type"]}: {title}') all_entries = PlaylistEntries(self, ie_result) entries = orderedSet(all_entries.get_requested_items(), lazy=True) lazy = self.params.get('lazy_playlist') if lazy: resolved_entries, n_entries = [], 'N/A' ie_result['requested_entries'], ie_result['entries'] = None, None else: entries = resolved_entries = list(entries) n_entries = len(resolved_entries) ie_result['requested_entries'], ie_result['entries'] = tuple(zip(*resolved_entries)) or ([], []) if not ie_result.get('playlist_count'): # Better to do this after potentially exhausting entries ie_result['playlist_count'] = all_entries.get_full_count() extra = self._playlist_infodict(ie_result, n_entries=int_or_none(n_entries)) ie_copy = collections.ChainMap(ie_result, extra) _infojson_written = False write_playlist_files = self.params.get('allow_playlist_files', True) if write_playlist_files and self.params.get('list_thumbnails'): self.list_thumbnails(ie_result) if write_playlist_files and not self.params.get('simulate'): _infojson_written = self._write_info_json( 'playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson')) if _infojson_written is None: return if self._write_description('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_description')) is None: return # TODO: This should be passed to ThumbnailsConvertor if necessary self._write_thumbnails('playlist', ie_result, self.prepare_filename(ie_copy, 'pl_thumbnail')) if lazy: if self.params.get('playlistreverse') or self.params.get('playlistrandom'): self.report_warning('playlistreverse and playlistrandom are not supported with lazy_playlist', only_once=True) elif self.params.get('playlistreverse'): entries.reverse() elif self.params.get('playlistrandom'): random.shuffle(entries) self.to_screen(f'[{ie_result["extractor"]}] Playlist {title}: Downloading {n_entries} items' f'{format_field(ie_result, "playlist_count", " of %s")}') keep_resolved_entries = self.params.get('extract_flat') != 'discard' if self.params.get('extract_flat') == 'discard_in_playlist': keep_resolved_entries = ie_result['_type'] != 'playlist' if keep_resolved_entries: self.write_debug('The information of all playlist entries will be held in memory') failures = 0 max_failures = self.params.get('skip_playlist_after_errors') or float('inf') for i, (playlist_index, entry) in enumerate(entries): if lazy: resolved_entries.append((playlist_index, entry)) if not entry: continue entry['__x_forwarded_for_ip'] = ie_result.get('__x_forwarded_for_ip') if not lazy and 'playlist-index' in self.params['compat_opts']: playlist_index = ie_result['requested_entries'][i] entry_copy = collections.ChainMap(entry, { **common_info, 'n_entries': int_or_none(n_entries), 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, }) if self._match_entry(entry_copy, incomplete=True) is not None: # For compatabilty with youtube-dl. See https://github.com/yt-dlp/yt-dlp/issues/4369 resolved_entries[i] = (playlist_index, NO_DEFAULT) continue self.to_screen( f'[download] Downloading item {self._format_screen(i + 1, self.Styles.ID)} ' f'of {self._format_screen(n_entries, self.Styles.EMPHASIS)}') entry_result = self.__process_iterable_entry(entry, download, collections.ChainMap({ 'playlist_index': playlist_index, 'playlist_autonumber': i + 1, }, extra)) if not entry_result: failures += 1 if failures >= max_failures: self.report_error( f'Skipping the remaining entries in playlist "{title}" since {failures} items failed extraction') break if keep_resolved_entries: resolved_entries[i] = (playlist_index, entry_result) # Update with processed data ie_result['entries'] = [e for _, e in resolved_entries if e is not NO_DEFAULT] ie_result['requested_entries'] = [i for i, e in resolved_entries if e is not NO_DEFAULT] if ie_result['requested_entries'] == try_call(lambda: list(range(1, ie_result['playlist_count'] + 1))): # Do not set for full playlist ie_result.pop('requested_entries') # Write the updated info to json if _infojson_written is True and self._write_info_json( 'updated playlist', ie_result, self.prepare_filename(ie_copy, 'pl_infojson'), overwrite=True) is None: return ie_result = self.run_all_pps('playlist', ie_result) self.to_screen(f'[download] Finished downloading playlist: {title}') return ie_result @_handle_extraction_exceptions def __process_iterable_entry(self, entry, download, extra_info): return self.process_ie_result( entry, download=download, extra_info=extra_info) def _build_format_filter(self, filter_spec): " Returns a function to filter the formats according to the filter_spec " OPERATORS = { '<': operator.lt, '<=': operator.le, '>': operator.gt, '>=': operator.ge, '=': operator.eq, '!=': operator.ne, } operator_rex = re.compile(r'''(?x)\s* (?P[\w.-]+)\s* (?P{})(?P\s*\?)?\s* (?P[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s* '''.format('|'.join(map(re.escape, OPERATORS.keys())))) m = operator_rex.fullmatch(filter_spec) if m: try: comparison_value = int(m.group('value')) except ValueError: comparison_value = parse_filesize(m.group('value')) if comparison_value is None: comparison_value = parse_filesize(m.group('value') + 'B') if comparison_value is None: raise ValueError( 'Invalid value {!r} in format specification {!r}'.format( m.group('value'), filter_spec)) op = OPERATORS[m.group('op')] if not m: STR_OPERATORS = { '=': operator.eq, '^=': lambda attr, value: attr.startswith(value), '$=': lambda attr, value: attr.endswith(value), '*=': lambda attr, value: value in attr, '~=': lambda attr, value: value.search(attr) is not None, } str_operator_rex = re.compile(r'''(?x)\s* (?P[a-zA-Z0-9._-]+)\s* (?P!\s*)?(?P{})\s*(?P\?\s*)? (?P["'])? (?P(?(quote)(?:(?!(?P=quote))[^\\]|\\.)+|[\w.-]+)) (?(quote)(?P=quote))\s* '''.format('|'.join(map(re.escape, STR_OPERATORS.keys())))) m = str_operator_rex.fullmatch(filter_spec) if m: if m.group('op') == '~=': comparison_value = re.compile(m.group('value')) else: comparison_value = re.sub(r'''\\([\\"'])''', r'\1', m.group('value')) str_op = STR_OPERATORS[m.group('op')] if m.group('negation'): op = lambda attr, value: not str_op(attr, value) else: op = str_op if not m: raise SyntaxError(f'Invalid filter specification {filter_spec!r}') def _filter(f): actual_value = f.get(m.group('key')) if actual_value is None: return m.group('none_inclusive') return op(actual_value, comparison_value) return _filter def _check_formats(self, formats): for f in formats: working = f.get('__working') if working is not None: if working: yield f continue self.to_screen('[info] Testing format {}'.format(f['format_id'])) path = self.get_output_path('temp') if not self._ensure_dir_exists(f'{path}/'): continue temp_file = tempfile.NamedTemporaryFile(suffix='.tmp', delete=False, dir=path or None) temp_file.close() try: success, _ = self.dl(temp_file.name, f, test=True) except (DownloadError, OSError, ValueError, *network_exceptions): success = False finally: if os.path.exists(temp_file.name): try: os.remove(temp_file.name) except OSError: self.report_warning(f'Unable to delete temporary file "{temp_file.name}"') f['__working'] = success if success: yield f else: self.to_screen('[info] Unable to download format {}. Skipping...'.format(f['format_id'])) def _select_formats(self, formats, selector): return list(selector({ 'formats': formats, 'has_merged_format': any('none' not in (f.get('acodec'), f.get('vcodec')) for f in formats), 'incomplete_formats': (all(f.get('vcodec') == 'none' for f in formats) # No formats with video or all(f.get('acodec') == 'none' for f in formats)), # OR, No formats with audio })) def _default_format_spec(self, info_dict): prefer_best = ( self.params['outtmpl']['default'] == '-' or info_dict.get('is_live') and not self.params.get('live_from_start')) def can_merge(): merger = FFmpegMergerPP(self) return merger.available and merger.can_merge() if not prefer_best and not can_merge(): prefer_best = True formats = self._get_formats(info_dict) evaluate_formats = lambda spec: self._select_formats(formats, self.build_format_selector(spec)) if evaluate_formats('b/bv+ba') != evaluate_formats('bv*+ba/b'): self.report_warning('ffmpeg not found. The downloaded format may not be the best available. ' 'Installing ffmpeg is strongly recommended: https://github.com/yt-dlp/yt-dlp#dependencies') compat = (self.params.get('allow_multiple_audio_streams') or 'format-spec' in self.params['compat_opts']) return ('best/bestvideo+bestaudio' if prefer_best else 'bestvideo+bestaudio/best' if compat else 'bestvideo*+bestaudio/best') def build_format_selector(self, format_spec): def syntax_error(note, start): message = ( 'Invalid format specification: ' '{}\n\t{}\n\t{}^'.format(note, format_spec, ' ' * start[1])) return SyntaxError(message) PICKFIRST = 'PICKFIRST' MERGE = 'MERGE' SINGLE = 'SINGLE' GROUP = 'GROUP' FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters']) allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False), 'video': self.params.get('allow_multiple_video_streams', False)} def _parse_filter(tokens): filter_parts = [] for type_, string_, _start, _, _ in tokens: if type_ == tokenize.OP and string_ == ']': return ''.join(filter_parts) else: filter_parts.append(string_) def _remove_unused_ops(tokens): # Remove operators that we don't use and join them with the surrounding strings. # E.g. 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9' ALLOWED_OPS = ('/', '+', ',', '(', ')') last_string, last_start, last_end, last_line = None, None, None, None for type_, string_, start, end, line in tokens: if type_ == tokenize.OP and string_ == '[': if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None yield type_, string_, start, end, line # everything inside brackets will be handled by _parse_filter for type_, string_, start, end, line in tokens: yield type_, string_, start, end, line if type_ == tokenize.OP and string_ == ']': break elif type_ == tokenize.OP and string_ in ALLOWED_OPS: if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line last_string = None yield type_, string_, start, end, line elif type_ in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]: if not last_string: last_string = string_ last_start = start last_end = end else: last_string += string_ if last_string: yield tokenize.NAME, last_string, last_start, last_end, last_line def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False): selectors = [] current_selector = None for type_, string_, start, _, _ in tokens: # ENCODING is only defined in Python 3.x if type_ == getattr(tokenize, 'ENCODING', None): continue elif type_ in [tokenize.NAME, tokenize.NUMBER]: current_selector = FormatSelector(SINGLE, string_, []) elif type_ == tokenize.OP: if string_ == ')': if not inside_group: # ')' will be handled by the parentheses group tokens.restore_last_token() break elif inside_merge and string_ in ['/', ',']: tokens.restore_last_token() break elif inside_choice and string_ == ',': tokens.restore_last_token() break elif string_ == ',': if not current_selector: raise syntax_error('"," must follow a format selector', start) selectors.append(current_selector) current_selector = None elif string_ == '/': if not current_selector: raise syntax_error('"/" must follow a format selector', start) first_choice = current_selector second_choice = _parse_format_selection(tokens, inside_choice=True) current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), []) elif string_ == '[': if not current_selector: current_selector = FormatSelector(SINGLE, 'best', []) format_filter = _parse_filter(tokens) current_selector.filters.append(format_filter) elif string_ == '(': if current_selector: raise syntax_error('Unexpected "("', start) group = _parse_format_selection(tokens, inside_group=True) current_selector = FormatSelector(GROUP, group, []) elif string_ == '+': if not current_selector: raise syntax_error('Unexpected "+"', start) selector_1 = current_selector selector_2 = _parse_format_selection(tokens, inside_merge=True) if not selector_2: raise syntax_error('Expected a selector', start) current_selector = FormatSelector(MERGE, (selector_1, selector_2), []) else: raise syntax_error(f'Operator not recognized: "{string_}"', start) elif type_ == tokenize.ENDMARKER: break if current_selector: selectors.append(current_selector) return selectors def _merge(formats_pair): format_1, format_2 = formats_pair formats_info = [] formats_info.extend(format_1.get('requested_formats', (format_1,))) formats_info.extend(format_2.get('requested_formats', (format_2,))) if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']: get_no_more = {'video': False, 'audio': False} for (i, fmt_info) in enumerate(formats_info): if fmt_info.get('acodec') == fmt_info.get('vcodec') == 'none': formats_info.pop(i) continue for aud_vid in ['audio', 'video']: if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none': if get_no_more[aud_vid]: formats_info.pop(i) break get_no_more[aud_vid] = True if len(formats_info) == 1: return formats_info[0] video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none'] audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none'] the_only_video = video_fmts[0] if len(video_fmts) == 1 else None the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None output_ext = get_compatible_ext( vcodecs=[f.get('vcodec') for f in video_fmts], acodecs=[f.get('acodec') for f in audio_fmts], vexts=[f['ext'] for f in video_fmts], aexts=[f['ext'] for f in audio_fmts], preferences=(try_call(lambda: self.params['merge_output_format'].split('/')) or self.params.get('prefer_free_formats') and ('webm', 'mkv'))) filtered = lambda *keys: filter(None, (traverse_obj(fmt, *keys) for fmt in formats_info)) new_dict = { 'requested_formats': formats_info, 'format': '+'.join(filtered('format')), 'format_id': '+'.join(filtered('format_id')), 'ext': output_ext, 'protocol': '+'.join(map(determine_protocol, formats_info)), 'language': '+'.join(orderedSet(filtered('language'))) or None, 'format_note': '+'.join(orderedSet(filtered('format_note'))) or None, 'filesize_approx': sum(filtered('filesize', 'filesize_approx')) or None, 'tbr': sum(filtered('tbr', 'vbr', 'abr')), } if the_only_video: new_dict.update({ 'width': the_only_video.get('width'), 'height': the_only_video.get('height'), 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video), 'fps': the_only_video.get('fps'), 'dynamic_range': the_only_video.get('dynamic_range'), 'vcodec': the_only_video.get('vcodec'), 'vbr': the_only_video.get('vbr'), 'stretched_ratio': the_only_video.get('stretched_ratio'), 'aspect_ratio': the_only_video.get('aspect_ratio'), }) if the_only_audio: new_dict.update({ 'acodec': the_only_audio.get('acodec'), 'abr': the_only_audio.get('abr'), 'asr': the_only_audio.get('asr'), 'audio_channels': the_only_audio.get('audio_channels'), }) return new_dict def _check_formats(formats): if self.params.get('check_formats') == 'selected': yield from self._check_formats(formats) return elif (self.params.get('check_formats') is not None or self.params.get('allow_unplayable_formats')): yield from formats return for f in formats: if f.get('has_drm') or f.get('__needs_testing'): yield from self._check_formats([f]) else: yield f def _build_selector_function(selector): if isinstance(selector, list): # , fs = [_build_selector_function(s) for s in selector] def selector_function(ctx): for f in fs: yield from f(ctx) return selector_function elif selector.type == GROUP: # () selector_function = _build_selector_function(selector.selector) elif selector.type == PICKFIRST: # / fs = [_build_selector_function(s) for s in selector.selector] def selector_function(ctx): for f in fs: picked_formats = list(f(ctx)) if picked_formats: return picked_formats return [] elif selector.type == MERGE: # + selector_1, selector_2 = map(_build_selector_function, selector.selector) def selector_function(ctx): for pair in itertools.product(selector_1(ctx), selector_2(ctx)): yield _merge(pair) elif selector.type == SINGLE: # atom format_spec = selector.selector or 'best' # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector if format_spec == 'all': def selector_function(ctx): yield from _check_formats(ctx['formats'][::-1]) elif format_spec == 'mergeall': def selector_function(ctx): formats = list(_check_formats( f for f in ctx['formats'] if f.get('vcodec') != 'none' or f.get('acodec') != 'none')) if not formats: return merged_format = formats[-1] for f in formats[-2::-1]: merged_format = _merge((merged_format, f)) yield merged_format else: format_fallback, seperate_fallback, format_reverse, format_idx = False, None, True, 1 mobj = re.match( r'(?Pbest|worst|b|w)(?Pvideo|audio|v|a)?(?P\*)?(?:\.(?P[1-9]\d*))?$', format_spec) if mobj is not None: format_idx = int_or_none(mobj.group('n'), default=1) format_reverse = mobj.group('bw')[0] == 'b' format_type = (mobj.group('type') or [None])[0] not_format_type = {'v': 'a', 'a': 'v'}.get(format_type) format_modified = mobj.group('mod') is not None format_fallback = not format_type and not format_modified # for b, w _filter_f = ( (lambda f: f.get(f'{format_type}codec') != 'none') if format_type and format_modified # bv*, ba*, wv*, wa* else (lambda f: f.get(f'{not_format_type}codec') == 'none') if format_type # bv, ba, wv, wa else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none') if not format_modified # b, w else lambda f: True) # b*, w* filter_f = lambda f: _filter_f(f) and ( f.get('vcodec') != 'none' or f.get('acodec') != 'none') else: if format_spec in self._format_selection_exts['audio']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' elif format_spec in self._format_selection_exts['video']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') != 'none' and f.get('vcodec') != 'none' seperate_fallback = lambda f: f.get('ext') == format_spec and f.get('vcodec') != 'none' elif format_spec in self._format_selection_exts['storyboards']: filter_f = lambda f: f.get('ext') == format_spec and f.get('acodec') == 'none' and f.get('vcodec') == 'none' else: filter_f = lambda f: f.get('format_id') == format_spec # id def selector_function(ctx): formats = list(ctx['formats']) matches = list(filter(filter_f, formats)) if filter_f is not None else formats if not matches: if format_fallback and ctx['incomplete_formats']: # for extractors with incomplete formats (audio only (soundcloud) # or video only (imgur)) best/worst will fallback to # best/worst {video,audio}-only format matches = list(filter(lambda f: f.get('vcodec') != 'none' or f.get('acodec') != 'none', formats)) elif seperate_fallback and not ctx['has_merged_format']: # for compatibility with youtube-dl when there is no pre-merged format matches = list(filter(seperate_fallback, formats)) matches = LazyList(_check_formats(matches[::-1 if format_reverse else 1])) try: yield matches[format_idx - 1] except LazyList.IndexError: return filters = [self._build_format_filter(f) for f in selector.filters] def final_selector(ctx): ctx_copy = dict(ctx) for _filter in filters: ctx_copy['formats'] = list(filter(_filter, ctx_copy['formats'])) return selector_function(ctx_copy) return final_selector # HACK: Python 3.12 changed the underlying parser, rendering '7_a' invalid # Prefix numbers with random letters to avoid it being classified as a number # See: https://github.com/yt-dlp/yt-dlp/pulls/8797 # TODO: Implement parser not reliant on tokenize.tokenize prefix = ''.join(random.choices(string.ascii_letters, k=32)) stream = io.BytesIO(re.sub(r'\d[_\d]*', rf'{prefix}\g<0>', format_spec).encode()) try: tokens = list(_remove_unused_ops( token._replace(string=token.string.replace(prefix, '')) for token in tokenize.tokenize(stream.readline))) except tokenize.TokenError: raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec))) class TokenIterator: def __init__(self, tokens): self.tokens = tokens self.counter = 0 def __iter__(self): return self def __next__(self): if self.counter >= len(self.tokens): raise StopIteration value = self.tokens[self.counter] self.counter += 1 return value next = __next__ def restore_last_token(self): self.counter -= 1 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens))) return _build_selector_function(parsed_selector) def _calc_headers(self, info_dict, load_cookies=False): res = HTTPHeaderDict(self.params['http_headers'], info_dict.get('http_headers')) clean_headers(res) if load_cookies: # For --load-info-json self._load_cookies(res.get('Cookie'), autoscope=info_dict['url']) # compat self._load_cookies(info_dict.get('cookies'), autoscope=False) # The `Cookie` header is removed to prevent leaks and unscoped cookies. # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj res.pop('Cookie', None) cookies = self.cookiejar.get_cookies_for_url(info_dict['url']) if cookies: encoder = LenientSimpleCookie() values = [] for cookie in cookies: _, value = encoder.value_encode(cookie.value) values.append(f'{cookie.name}={value}') if cookie.domain: values.append(f'Domain={cookie.domain}') if cookie.path: values.append(f'Path={cookie.path}') if cookie.secure: values.append('Secure') if cookie.expires: values.append(f'Expires={cookie.expires}') if cookie.version: values.append(f'Version={cookie.version}') info_dict['cookies'] = '; '.join(values) if 'X-Forwarded-For' not in res: x_forwarded_for_ip = info_dict.get('__x_forwarded_for_ip') if x_forwarded_for_ip: res['X-Forwarded-For'] = x_forwarded_for_ip return res def _calc_cookies(self, url): self.deprecation_warning('"YoutubeDL._calc_cookies" is deprecated and may be removed in a future version') return self.cookiejar.get_cookie_header(url) def _sort_thumbnails(self, thumbnails): thumbnails.sort(key=lambda t: ( t.get('preference') if t.get('preference') is not None else -1, t.get('width') if t.get('width') is not None else -1, t.get('height') if t.get('height') is not None else -1, t.get('id') if t.get('id') is not None else '', t.get('url'))) def _sanitize_thumbnails(self, info_dict): thumbnails = info_dict.get('thumbnails') if thumbnails is None: thumbnail = info_dict.get('thumbnail') if thumbnail: info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}] if not thumbnails: return def check_thumbnails(thumbnails): for t in thumbnails: self.to_screen(f'[info] Testing thumbnail {t["id"]}') try: self.urlopen(HEADRequest(t['url'])) except network_exceptions as err: self.to_screen(f'[info] Unable to connect to thumbnail {t["id"]} URL {t["url"]!r} - {err}. Skipping...') continue yield t self._sort_thumbnails(thumbnails) for i, t in enumerate(thumbnails): if t.get('id') is None: t['id'] = str(i) if t.get('width') and t.get('height'): t['resolution'] = '%dx%d' % (t['width'], t['height']) t['url'] = sanitize_url(t['url']) if self.params.get('check_formats') is True: info_dict['thumbnails'] = LazyList(check_thumbnails(thumbnails[::-1]), reverse=True) else: info_dict['thumbnails'] = thumbnails def _fill_common_fields(self, info_dict, final=True): # TODO: move sanitization here if final: title = info_dict['fulltitle'] = info_dict.get('title') if not title: if title == '': self.write_debug('Extractor gave empty title. Creating a generic title') else: self.report_warning('Extractor failed to obtain "title". Creating a generic title instead') info_dict['title'] = f'{info_dict["extractor"].replace(":", "-")} video #{info_dict["id"]}' if info_dict.get('duration') is not None: info_dict['duration_string'] = formatSeconds(info_dict['duration']) for ts_key, date_key in ( ('timestamp', 'upload_date'), ('release_timestamp', 'release_date'), ('modified_timestamp', 'modified_date'), ): if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None: # Working around out-of-range timestamp values (e.g. negative ones on Windows, # see http://bugs.python.org/issue1646728) with contextlib.suppress(ValueError, OverflowError, OSError): upload_date = dt.datetime.fromtimestamp(info_dict[ts_key], dt.timezone.utc) info_dict[date_key] = upload_date.strftime('%Y%m%d') if not info_dict.get('release_year'): info_dict['release_year'] = traverse_obj(info_dict, ('release_date', {lambda x: int(x[:4])})) live_keys = ('is_live', 'was_live') live_status = info_dict.get('live_status') if live_status is None: for key in live_keys: if info_dict.get(key) is False: continue if info_dict.get(key): live_status = key break if all(info_dict.get(key) is False for key in live_keys): live_status = 'not_live' if live_status: info_dict['live_status'] = live_status for key in live_keys: if info_dict.get(key) is None: info_dict[key] = (live_status == key) if live_status == 'post_live': info_dict['was_live'] = True # Auto generate title fields corresponding to the *_number fields when missing # in order to always have clean titles. This is very common for TV series. for field in ('chapter', 'season', 'episode'): if final and info_dict.get(f'{field}_number') is not None and not info_dict.get(field): info_dict[field] = '%s %d' % (field.capitalize(), info_dict[f'{field}_number']) for old_key, new_key in self._deprecated_multivalue_fields.items(): if new_key in info_dict and old_key in info_dict: if '_version' not in info_dict: # HACK: Do not warn when using --load-info-json self.deprecation_warning(f'Do not return {old_key!r} when {new_key!r} is present') elif old_value := info_dict.get(old_key): info_dict[new_key] = old_value.split(', ') elif new_value := info_dict.get(new_key): info_dict[old_key] = ', '.join(v.replace(',', '\N{FULLWIDTH COMMA}') for v in new_value) def _raise_pending_errors(self, info): err = info.pop('__pending_error', None) if err: self.report_error(err, tb=False) def sort_formats(self, info_dict): formats = self._get_formats(info_dict) formats.sort(key=FormatSorter( self, info_dict.get('_format_sort_fields') or []).calculate_preference) def process_video_result(self, info_dict, download=True): assert info_dict.get('_type', 'video') == 'video' self._num_videos += 1 if 'id' not in info_dict: raise ExtractorError('Missing "id" field in extractor result', ie=info_dict['extractor']) elif not info_dict.get('id'): raise ExtractorError('Extractor failed to obtain "id"', ie=info_dict['extractor']) def report_force_conversion(field, field_not, conversion): self.report_warning( f'"{field}" field is not {field_not} - forcing {conversion} conversion, ' 'there is an error in extractor') def sanitize_string_field(info, string_field): field = info.get(string_field) if field is None or isinstance(field, str): return report_force_conversion(string_field, 'a string', 'string') info[string_field] = str(field) def sanitize_numeric_fields(info): for numeric_field in self._NUMERIC_FIELDS: field = info.get(numeric_field) if field is None or isinstance(field, (int, float)): continue report_force_conversion(numeric_field, 'numeric', 'int') info[numeric_field] = int_or_none(field) sanitize_string_field(info_dict, 'id') sanitize_numeric_fields(info_dict) if info_dict.get('section_end') and info_dict.get('section_start') is not None: info_dict['duration'] = round(info_dict['section_end'] - info_dict['section_start'], 3) if (info_dict.get('duration') or 0) <= 0 and info_dict.pop('duration', None): self.report_warning('"duration" field is negative, there is an error in extractor') chapters = info_dict.get('chapters') or [] if chapters and chapters[0].get('start_time'): chapters.insert(0, {'start_time': 0}) dummy_chapter = {'end_time': 0, 'start_time': info_dict.get('duration')} for idx, (prev, current, next_) in enumerate(zip( (dummy_chapter, *chapters), chapters, (*chapters[1:], dummy_chapter)), 1): if current.get('start_time') is None: current['start_time'] = prev.get('end_time') if not current.get('end_time'): current['end_time'] = next_.get('start_time') if not current.get('title'): current['title'] = f'' if 'playlist' not in info_dict: # It isn't part of a playlist info_dict['playlist'] = None info_dict['playlist_index'] = None self._sanitize_thumbnails(info_dict) thumbnail = info_dict.get('thumbnail') thumbnails = info_dict.get('thumbnails') if thumbnail: info_dict['thumbnail'] = sanitize_url(thumbnail) elif thumbnails: info_dict['thumbnail'] = thumbnails[-1]['url'] if info_dict.get('display_id') is None and 'id' in info_dict: info_dict['display_id'] = info_dict['id'] self._fill_common_fields(info_dict) for cc_kind in ('subtitles', 'automatic_captions'): cc = info_dict.get(cc_kind) if cc: for _, subtitle in cc.items(): for subtitle_format in subtitle: if subtitle_format.get('url'): subtitle_format['url'] = sanitize_url(subtitle_format['url']) if subtitle_format.get('ext') is None: subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower() automatic_captions = info_dict.get('automatic_captions') subtitles = info_dict.get('subtitles') info_dict['requested_subtitles'] = self.process_subtitles( info_dict['id'], subtitles, automatic_captions) formats = self._get_formats(info_dict) # Backward compatibility with InfoExtractor._sort_formats field_preference = (formats or [{}])[0].pop('__sort_fields', None) if field_preference: info_dict['_format_sort_fields'] = field_preference info_dict['_has_drm'] = any( # or None ensures --clean-infojson removes it f.get('has_drm') and f['has_drm'] != 'maybe' for f in formats) or None if not self.params.get('allow_unplayable_formats'): formats = [f for f in formats if not f.get('has_drm') or f['has_drm'] == 'maybe'] if formats and all(f.get('acodec') == f.get('vcodec') == 'none' for f in formats): self.report_warning( f'{"This video is DRM protected and " if info_dict["_has_drm"] else ""}' 'only images are available for download. Use --list-formats to see them'.capitalize()) get_from_start = not info_dict.get('is_live') or bool(self.params.get('live_from_start')) if not get_from_start: info_dict['title'] += ' ' + dt.datetime.now().strftime('%Y-%m-%d %H:%M') if info_dict.get('is_live') and formats: formats = [f for f in formats if bool(f.get('is_from_start')) == get_from_start] if get_from_start and not formats: self.raise_no_formats(info_dict, msg=( '--live-from-start is passed, but there are no formats that can be downloaded from the start. ' 'If you want to download from the current time, use --no-live-from-start')) def is_wellformed(f): url = f.get('url') if not url: self.report_warning( '"url" field is missing or empty - skipping format, ' 'there is an error in extractor') return False if isinstance(url, bytes): sanitize_string_field(f, 'url') return True # Filter out malformed formats for better extraction robustness formats = list(filter(is_wellformed, formats or [])) if not formats: self.raise_no_formats(info_dict) for fmt in formats: sanitize_string_field(fmt, 'format_id') sanitize_numeric_fields(fmt) fmt['url'] = sanitize_url(fmt['url']) if fmt.get('ext') is None: fmt['ext'] = determine_ext(fmt['url']).lower() if fmt['ext'] in ('aac', 'opus', 'mp3', 'flac', 'vorbis'): if fmt.get('acodec') is None: fmt['acodec'] = fmt['ext'] if fmt.get('protocol') is None: fmt['protocol'] = determine_protocol(fmt) if fmt.get('resolution') is None: fmt['resolution'] = self.format_resolution(fmt, default=None) if fmt.get('dynamic_range') is None and fmt.get('vcodec') != 'none': fmt['dynamic_range'] = 'SDR' if fmt.get('aspect_ratio') is None: fmt['aspect_ratio'] = try_call(lambda: round(fmt['width'] / fmt['height'], 2)) # For fragmented formats, "tbr" is often max bitrate and not average if (('manifest-filesize-approx' in self.params['compat_opts'] or not fmt.get('manifest_url')) and not fmt.get('filesize') and not fmt.get('filesize_approx')): fmt['filesize_approx'] = filesize_from_tbr(fmt.get('tbr'), info_dict.get('duration')) fmt['http_headers'] = self._calc_headers(collections.ChainMap(fmt, info_dict), load_cookies=True) # Safeguard against old/insecure infojson when using --load-info-json if info_dict.get('http_headers'): info_dict['http_headers'] = HTTPHeaderDict(info_dict['http_headers']) info_dict['http_headers'].pop('Cookie', None) # This is copied to http_headers by the above _calc_headers and can now be removed if '__x_forwarded_for_ip' in info_dict: del info_dict['__x_forwarded_for_ip'] self.sort_formats({ 'formats': formats, '_format_sort_fields': info_dict.get('_format_sort_fields'), }) # Sanitize and group by format_id formats_dict = {} for i, fmt in enumerate(formats): if not fmt.get('format_id'): fmt['format_id'] = str(i) else: # Sanitize format_id from characters used in format selector expression fmt['format_id'] = re.sub(r'[\s,/+\[\]()]', '_', fmt['format_id']) formats_dict.setdefault(fmt['format_id'], []).append(fmt) # Make sure all formats have unique format_id common_exts = set(itertools.chain(*self._format_selection_exts.values())) for format_id, ambiguous_formats in formats_dict.items(): ambigious_id = len(ambiguous_formats) > 1 for i, fmt in enumerate(ambiguous_formats): if ambigious_id: fmt['format_id'] = f'{format_id}-{i}' # Ensure there is no conflict between id and ext in format selection # See https://github.com/yt-dlp/yt-dlp/issues/1282 if fmt['format_id'] != fmt['ext'] and fmt['format_id'] in common_exts: fmt['format_id'] = 'f{}'.format(fmt['format_id']) if fmt.get('format') is None: fmt['format'] = '{id} - {res}{note}'.format( id=fmt['format_id'], res=self.format_resolution(fmt), note=format_field(fmt, 'format_note', ' (%s)'), ) if self.params.get('check_formats') is True: formats = LazyList(self._check_formats(formats[::-1]), reverse=True) if not formats or formats[0] is not info_dict: # only set the 'formats' fields if the original info_dict list them # otherwise we end up with a circular reference, the first (and unique) # element in the 'formats' field in info_dict is info_dict itself, # which can't be exported to json info_dict['formats'] = formats info_dict, _ = self.pre_process(info_dict) if self._match_entry(info_dict, incomplete=self._format_fields) is not None: return info_dict self.post_extract(info_dict) info_dict, _ = self.pre_process(info_dict, 'after_filter') # The pre-processors may have modified the formats formats = self._get_formats(info_dict) list_only = self.params.get('simulate') == 'list_only' interactive_format_selection = not list_only and self.format_selector == '-' if self.params.get('list_thumbnails'): self.list_thumbnails(info_dict) if self.params.get('listsubtitles'): if 'automatic_captions' in info_dict: self.list_subtitles( info_dict['id'], automatic_captions, 'automatic captions') self.list_subtitles(info_dict['id'], subtitles, 'subtitles') if self.params.get('listformats') or interactive_format_selection: self.list_formats(info_dict) if list_only: # Without this printing, -F --print-json will not work self.__forced_printings(info_dict) return info_dict format_selector = self.format_selector while True: if interactive_format_selection: req_format = input(self._format_screen('\nEnter format selector ', self.Styles.EMPHASIS) + '(Press ENTER for default, or Ctrl+C to quit)' + self._format_screen(': ', self.Styles.EMPHASIS)) try: format_selector = self.build_format_selector(req_format) if req_format else None except SyntaxError as err: self.report_error(err, tb=False, is_error=False) continue if format_selector is None: req_format = self._default_format_spec(info_dict) self.write_debug(f'Default format spec: {req_format}') format_selector = self.build_format_selector(req_format) formats_to_download = self._select_formats(formats, format_selector) if interactive_format_selection and not formats_to_download: self.report_error('Requested format is not available', tb=False, is_error=False) continue break if not formats_to_download: if not self.params.get('ignore_no_formats_error'): raise ExtractorError( 'Requested format is not available. Use --list-formats for a list of available formats', expected=True, video_id=info_dict['id'], ie=info_dict['extractor']) self.report_warning('Requested format is not available') # Process what we can, even without any available formats. formats_to_download = [{}] requested_ranges = tuple(self.params.get('download_ranges', lambda *_: [{}])(info_dict, self)) best_format, downloaded_formats = formats_to_download[-1], [] if download: if best_format and requested_ranges: def to_screen(*msg): self.to_screen(f'[info] {info_dict["id"]}: {" ".join(", ".join(variadic(m)) for m in msg)}') to_screen(f'Downloading {len(formats_to_download)} format(s):', (f['format_id'] for f in formats_to_download)) if requested_ranges != ({}, ): to_screen(f'Downloading {len(requested_ranges)} time ranges:', (f'{c["start_time"]:.1f}-{c["end_time"]:.1f}' for c in requested_ranges)) max_downloads_reached = False for fmt, chapter in itertools.product(formats_to_download, requested_ranges): new_info = self._copy_infodict(info_dict) new_info.update(fmt) offset, duration = info_dict.get('section_start') or 0, info_dict.get('duration') or float('inf') end_time = offset + min(chapter.get('end_time', duration), duration) # duration may not be accurate. So allow deviations <1sec if end_time == float('inf') or end_time > offset + duration + 1: end_time = None if chapter or offset: new_info.update({ 'section_start': offset + chapter.get('start_time', 0), 'section_end': end_time, 'section_title': chapter.get('title'), 'section_number': chapter.get('index'), }) downloaded_formats.append(new_info) try: self.process_info(new_info) except MaxDownloadsReached: max_downloads_reached = True self._raise_pending_errors(new_info) # Remove copied info for key, val in tuple(new_info.items()): if info_dict.get(key) == val: new_info.pop(key) if max_downloads_reached: break write_archive = {f.get('__write_download_archive', False) for f in downloaded_formats} assert write_archive.issubset({True, False, 'ignore'}) if True in write_archive and False not in write_archive: self.record_download_archive(info_dict) info_dict['requested_downloads'] = downloaded_formats info_dict = self.run_all_pps('after_video', info_dict) if max_downloads_reached: raise MaxDownloadsReached # We update the info dict with the selected best quality format (backwards compatibility) info_dict.update(best_format) return info_dict def process_subtitles(self, video_id, normal_subtitles, automatic_captions): """Select the requested subtitles and their format""" available_subs, normal_sub_langs = {}, [] if normal_subtitles and self.params.get('writesubtitles'): available_subs.update(normal_subtitles) normal_sub_langs = tuple(normal_subtitles.keys()) if automatic_captions and self.params.get('writeautomaticsub'): for lang, cap_info in automatic_captions.items(): if lang not in available_subs: available_subs[lang] = cap_info if not available_subs or ( not self.params.get('writesubtitles') and not self.params.get('writeautomaticsub')): return None all_sub_langs = tuple(available_subs.keys()) if self.params.get('allsubtitles', False): requested_langs = all_sub_langs elif self.params.get('subtitleslangs', False): try: requested_langs = orderedSet_from_options( self.params.get('subtitleslangs'), {'all': all_sub_langs}, use_regex=True) except re.error as e: raise ValueError(f'Wrong regex for subtitlelangs: {e.pattern}') else: requested_langs = LazyList(itertools.chain( ['en'] if 'en' in normal_sub_langs else [], filter(lambda f: f.startswith('en'), normal_sub_langs), ['en'] if 'en' in all_sub_langs else [], filter(lambda f: f.startswith('en'), all_sub_langs), normal_sub_langs, all_sub_langs, ))[:1] if requested_langs: self.to_screen(f'[info] {video_id}: Downloading subtitles: {", ".join(requested_langs)}') formats_query = self.params.get('subtitlesformat', 'best') formats_preference = formats_query.split('/') if formats_query else [] subs = {} for lang in requested_langs: formats = available_subs.get(lang) if formats is None: self.report_warning(f'{lang} subtitles not available for {video_id}') continue for ext in formats_preference: if ext == 'best': f = formats[-1] break matches = list(filter(lambda f: f['ext'] == ext, formats)) if matches: f = matches[-1] break else: f = formats[-1] self.report_warning( 'No subtitle format found matching "{}" for language {}, ' 'using {}. Use --list-subs for a list of available subtitles'.format(formats_query, lang, f['ext'])) subs[lang] = f return subs def _forceprint(self, key, info_dict): if info_dict is None: return info_copy = info_dict.copy() info_copy.setdefault('filename', self.prepare_filename(info_dict)) if info_dict.get('requested_formats') is not None: # For RTMP URLs, also include the playpath info_copy['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats']) elif info_dict.get('url'): info_copy['urls'] = info_dict['url'] + info_dict.get('play_path', '') info_copy['formats_table'] = self.render_formats_table(info_dict) info_copy['thumbnails_table'] = self.render_thumbnails_table(info_dict) info_copy['subtitles_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('subtitles')) info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions')) def format_tmpl(tmpl): mobj = re.fullmatch(r'([\w.:,]|-\d|(?P{([\w.:,]|-\d)+}))+=?', tmpl) if not mobj: return tmpl fmt = '%({})s' if tmpl.startswith('{'): tmpl, fmt = f'.{tmpl}', '%({})j' if tmpl.endswith('='): tmpl, fmt = tmpl[:-1], '{0} = %({0})#j' return '\n'.join(map(fmt.format, [tmpl] if mobj.group('dict') else tmpl.split(','))) for tmpl in self.params['forceprint'].get(key, []): self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy)) for tmpl, file_tmpl in self.params['print_to_file'].get(key, []): filename = self.prepare_filename(info_dict, outtmpl=file_tmpl) tmpl = format_tmpl(tmpl) self.to_screen(f'[info] Writing {tmpl!r} to: {filename}') if self._ensure_dir_exists(filename): with open(filename, 'a', encoding='utf-8', newline='') as f: f.write(self.evaluate_outtmpl(tmpl, info_copy) + os.linesep) return info_copy def __forced_printings(self, info_dict, filename=None, incomplete=True): if (self.params.get('forcejson') or self.params['forceprint'].get('video') or self.params['print_to_file'].get('video')): self.post_extract(info_dict) if filename: info_dict['filename'] = filename info_copy = self._forceprint('video', info_dict) def print_field(field, actual_field=None, optional=False): if actual_field is None: actual_field = field if self.params.get(f'force{field}') and ( info_copy.get(field) is not None or (not optional and not incomplete)): self.to_stdout(info_copy[actual_field]) print_field('title') print_field('id') print_field('url', 'urls') print_field('thumbnail', optional=True) print_field('description', optional=True) print_field('filename') if self.params.get('forceduration') and info_copy.get('duration') is not None: self.to_stdout(formatSeconds(info_copy['duration'])) print_field('format') if self.params.get('forcejson'): self.to_stdout(json.dumps(self.sanitize_info(info_dict))) def dl(self, name, info, subtitle=False, test=False): if not info.get('url'): self.raise_no_formats(info, True) if test: verbose = self.params.get('verbose') quiet = self.params.get('quiet') or not verbose params = { 'test': True, 'quiet': quiet, 'verbose': verbose, 'noprogress': quiet, 'nopart': True, 'skip_unavailable_fragments': False, 'keep_fragments': False, 'overwrites': True, '_no_ytdl_file': True, } else: params = self.params fd = get_suitable_downloader(info, params, to_stdout=(name == '-'))(self, params) if not test: for ph in self._progress_hooks: fd.add_progress_hook(ph) urls = '", "'.join( (f['url'].split(',')[0] + ',' if f['url'].startswith('data:') else f['url']) for f in info.get('requested_formats', []) or [info]) self.write_debug(f'Invoking {fd.FD_NAME} downloader on "{urls}"') # Note: Ideally info should be a deep-copied so that hooks cannot modify it. # But it may contain objects that are not deep-copyable new_info = self._copy_infodict(info) if new_info.get('http_headers') is None: new_info['http_headers'] = self._calc_headers(new_info) return fd.download(name, new_info, subtitle) def existing_file(self, filepaths, *, default_overwrite=True): existing_files = list(filter(os.path.exists, orderedSet(filepaths))) if existing_files and not self.params.get('overwrites', default_overwrite): return existing_files[0] for file in existing_files: self.report_file_delete(file) os.remove(file) return None @_catch_unsafe_extension_error def process_info(self, info_dict): """Process a single resolved IE result. (Modifies it in-place)""" assert info_dict.get('_type', 'video') == 'video' original_infodict = info_dict if 'format' not in info_dict and 'ext' in info_dict: info_dict['format'] = info_dict['ext'] if self._match_entry(info_dict) is not None: info_dict['__write_download_archive'] = 'ignore' return # Does nothing under normal operation - for backward compatibility of process_info self.post_extract(info_dict) def replace_info_dict(new_info): nonlocal info_dict if new_info == info_dict: return info_dict.clear() info_dict.update(new_info) new_info, _ = self.pre_process(info_dict, 'video') replace_info_dict(new_info) self._num_downloads += 1 # info_dict['_filename'] needs to be set for backward compatibility info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True) temp_filename = self.prepare_filename(info_dict, 'temp') files_to_move = {} # Forced printings self.__forced_printings(info_dict, full_filename, incomplete=('format' not in info_dict)) def check_max_downloads(): if self._num_downloads >= float(self.params.get('max_downloads') or 'inf'): raise MaxDownloadsReached if self.params.get('simulate'): info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') check_max_downloads() return if full_filename is None: return if not self._ensure_dir_exists(encodeFilename(full_filename)): return if not self._ensure_dir_exists(encodeFilename(temp_filename)): return if self._write_description('video', info_dict, self.prepare_filename(info_dict, 'description')) is None: return sub_files = self._write_subtitles(info_dict, temp_filename) if sub_files is None: return files_to_move.update(dict(sub_files)) thumb_files = self._write_thumbnails( 'video', info_dict, temp_filename, self.prepare_filename(info_dict, 'thumbnail')) if thumb_files is None: return files_to_move.update(dict(thumb_files)) infofn = self.prepare_filename(info_dict, 'infojson') _infojson_written = self._write_info_json('video', info_dict, infofn) if _infojson_written: info_dict['infojson_filename'] = infofn # For backward compatibility, even though it was a private field info_dict['__infojson_filename'] = infofn elif _infojson_written is None: return # Note: Annotations are deprecated annofn = None if self.params.get('writeannotations', False): annofn = self.prepare_filename(info_dict, 'annotation') if annofn: if not self._ensure_dir_exists(encodeFilename(annofn)): return if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)): self.to_screen('[info] Video annotations are already present') elif not info_dict.get('annotations'): self.report_warning('There are no annotations to write.') else: try: self.to_screen('[info] Writing video annotations to: ' + annofn) with open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile: annofile.write(info_dict['annotations']) except (KeyError, TypeError): self.report_warning('There are no annotations to write.') except OSError: self.report_error('Cannot write annotations file: ' + annofn) return # Write internet shortcut files def _write_link_file(link_type): url = try_get(info_dict['webpage_url'], iri_to_uri) if not url: self.report_warning( f'Cannot write internet shortcut file because the actual URL of "{info_dict["webpage_url"]}" is unknown') return True linkfn = replace_extension(self.prepare_filename(info_dict, 'link'), link_type, info_dict.get('ext')) if not self._ensure_dir_exists(encodeFilename(linkfn)): return False if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)): self.to_screen(f'[info] Internet shortcut (.{link_type}) is already present') return True try: self.to_screen(f'[info] Writing internet shortcut (.{link_type}) to: {linkfn}') with open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline='\r\n' if link_type == 'url' else '\n') as linkfile: template_vars = {'url': url} if link_type == 'desktop': template_vars['filename'] = linkfn[:-(len(link_type) + 1)] linkfile.write(LINK_TEMPLATES[link_type] % template_vars) except OSError: self.report_error(f'Cannot write internet shortcut {linkfn}') return False return True write_links = { 'url': self.params.get('writeurllink'), 'webloc': self.params.get('writewebloclink'), 'desktop': self.params.get('writedesktoplink'), } if self.params.get('writelink'): link_type = ('webloc' if sys.platform == 'darwin' else 'desktop' if sys.platform.startswith('linux') else 'url') write_links[link_type] = True if any(should_write and not _write_link_file(link_type) for link_type, should_write in write_links.items()): return new_info, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move) replace_info_dict(new_info) if self.params.get('skip_download'): info_dict['filepath'] = temp_filename info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) info_dict['__files_to_move'] = files_to_move replace_info_dict(self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)) info_dict['__write_download_archive'] = self.params.get('force_write_download_archive') else: # Download info_dict.setdefault('__postprocessors', []) try: def existing_video_file(*filepaths): ext = info_dict.get('ext') converted = lambda file: replace_extension(file, self.params.get('final_ext') or ext, ext) file = self.existing_file(itertools.chain(*zip(map(converted, filepaths), filepaths)), default_overwrite=False) if file: info_dict['ext'] = os.path.splitext(file)[1][1:] return file fd, success = None, True if info_dict.get('protocol') or info_dict.get('url'): fd = get_suitable_downloader(info_dict, self.params, to_stdout=temp_filename == '-') if fd != FFmpegFD and 'no-direct-merge' not in self.params['compat_opts'] and ( info_dict.get('section_start') or info_dict.get('section_end')): msg = ('This format cannot be partially downloaded' if FFmpegFD.available() else 'You have requested downloading the video partially, but ffmpeg is not installed') self.report_error(f'{msg}. Aborting') return if info_dict.get('requested_formats') is not None: old_ext = info_dict['ext'] if self.params.get('merge_output_format') is None: if (info_dict['ext'] == 'webm' and info_dict.get('thumbnails') # check with type instead of pp_key, __name__, or isinstance # since we dont want any custom PPs to trigger this and any(type(pp) == EmbedThumbnailPP for pp in self._pps['post_process'])): # noqa: E721 info_dict['ext'] = 'mkv' self.report_warning( 'webm doesn\'t support embedding a thumbnail, mkv will be used') new_ext = info_dict['ext'] def correct_ext(filename, ext=new_ext): if filename == '-': return filename filename_real_ext = os.path.splitext(filename)[1][1:] filename_wo_ext = ( os.path.splitext(filename)[0] if filename_real_ext in (old_ext, new_ext) else filename) return f'{filename_wo_ext}.{ext}' # Ensure filename always has a correct extension for successful merge full_filename = correct_ext(full_filename) temp_filename = correct_ext(temp_filename) dl_filename = existing_video_file(full_filename, temp_filename) info_dict['__real_download'] = False # NOTE: Copy so that original format dicts are not modified info_dict['requested_formats'] = list(map(dict, info_dict['requested_formats'])) merger = FFmpegMergerPP(self) downloaded = [] if dl_filename is not None: self.report_file_already_downloaded(dl_filename) elif fd: for f in info_dict['requested_formats'] if fd != FFmpegFD else []: f['filepath'] = fname = prepend_extension( correct_ext(temp_filename, info_dict['ext']), 'f{}'.format(f['format_id']), info_dict['ext']) downloaded.append(fname) info_dict['url'] = '\n'.join(f['url'] for f in info_dict['requested_formats']) success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: if self.params.get('allow_unplayable_formats'): self.report_warning( 'You have requested merging of multiple formats ' 'while also allowing unplayable formats to be downloaded. ' 'The formats won\'t be merged to prevent data corruption.') elif not merger.available: msg = 'You have requested merging of multiple formats but ffmpeg is not installed' if not self.params.get('ignoreerrors'): self.report_error(f'{msg}. Aborting due to --abort-on-error') return self.report_warning(f'{msg}. The formats won\'t be merged') if temp_filename == '-': reason = ('using a downloader other than ffmpeg' if FFmpegFD.can_merge_formats(info_dict, self.params) else 'but the formats are incompatible for simultaneous download' if merger.available else 'but ffmpeg is not installed') self.report_warning( f'You have requested downloading multiple formats to stdout {reason}. ' 'The formats will be streamed one after the other') fname = temp_filename for f in info_dict['requested_formats']: new_info = dict(info_dict) del new_info['requested_formats'] new_info.update(f) if temp_filename != '-': fname = prepend_extension( correct_ext(temp_filename, new_info['ext']), 'f{}'.format(f['format_id']), new_info['ext']) if not self._ensure_dir_exists(fname): return f['filepath'] = fname downloaded.append(fname) partial_success, real_download = self.dl(fname, new_info) info_dict['__real_download'] = info_dict['__real_download'] or real_download success = success and partial_success if downloaded and merger.available and not self.params.get('allow_unplayable_formats'): info_dict['__postprocessors'].append(merger) info_dict['__files_to_merge'] = downloaded # Even if there were no downloads, it is being merged only now info_dict['__real_download'] = True else: for file in downloaded: files_to_move[file] = None else: # Just a single file dl_filename = existing_video_file(full_filename, temp_filename) if dl_filename is None or dl_filename == temp_filename: # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part. # So we should try to resume the download success, real_download = self.dl(temp_filename, info_dict) info_dict['__real_download'] = real_download else: self.report_file_already_downloaded(dl_filename) dl_filename = dl_filename or temp_filename info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename))) except network_exceptions as err: self.report_error(f'unable to download video data: {err}') return except OSError as err: raise UnavailableVideoError(err) except ContentTooShortError as err: self.report_error(f'content too short (expected {err.expected} bytes and served {err.downloaded})') return self._raise_pending_errors(info_dict) if success and full_filename != '-': def fixup(): do_fixup = True fixup_policy = self.params.get('fixup') vid = info_dict['id'] if fixup_policy in ('ignore', 'never'): return elif fixup_policy == 'warn': do_fixup = 'warn' elif fixup_policy != 'force': assert fixup_policy in ('detect_or_warn', None) if not info_dict.get('__real_download'): do_fixup = False def ffmpeg_fixup(cndn, msg, cls): if not (do_fixup and cndn): return elif do_fixup == 'warn': self.report_warning(f'{vid}: {msg}') return pp = cls(self) if pp.available: info_dict['__postprocessors'].append(pp) else: self.report_warning(f'{vid}: {msg}. Install ffmpeg to fix this automatically') stretched_ratio = info_dict.get('stretched_ratio') ffmpeg_fixup(stretched_ratio not in (1, None), f'Non-uniform pixel ratio {stretched_ratio}', FFmpegFixupStretchedPP) downloader = get_suitable_downloader(info_dict, self.params) if 'protocol' in info_dict else None downloader = downloader.FD_NAME if downloader else None ext = info_dict.get('ext') postprocessed_by_ffmpeg = info_dict.get('requested_formats') or any(( isinstance(pp, FFmpegVideoConvertorPP) and resolve_recode_mapping(ext, pp.mapping)[0] not in (ext, None) ) for pp in self._pps['post_process']) if not postprocessed_by_ffmpeg: ffmpeg_fixup(fd != FFmpegFD and ext == 'm4a' and info_dict.get('container') == 'm4a_dash', 'writing DASH m4a. Only some players support this container', FFmpegFixupM4aPP) ffmpeg_fixup(downloader == 'hlsnative' and not self.params.get('hls_use_mpegts') or info_dict.get('is_live') and self.params.get('hls_use_mpegts') is None, 'Possible MPEG-TS in MP4 container or malformed AAC timestamps', FFmpegFixupM3u8PP) ffmpeg_fixup(downloader == 'dashsegments' and (info_dict.get('is_live') or info_dict.get('is_dash_periods')), 'Possible duplicate MOOV atoms', FFmpegFixupDuplicateMoovPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed timestamps detected', FFmpegFixupTimestampPP) ffmpeg_fixup(downloader == 'web_socket_fragment', 'Malformed duration detected', FFmpegFixupDurationPP) fixup() try: replace_info_dict(self.post_process(dl_filename, info_dict, files_to_move)) except PostProcessingError as err: self.report_error(f'Postprocessing: {err}') return try: for ph in self._post_hooks: ph(info_dict['filepath']) except Exception as err: self.report_error(f'post hooks: {err}') return info_dict['__write_download_archive'] = True assert info_dict is original_infodict # Make sure the info_dict was modified in-place if self.params.get('force_write_download_archive'): info_dict['__write_download_archive'] = True check_max_downloads() def __download_wrapper(self, func): @functools.wraps(func) def wrapper(*args, **kwargs): try: res = func(*args, **kwargs) except UnavailableVideoError as e: self.report_error(e) except DownloadCancelled as e: self.to_screen(f'[info] {e}') if not self.params.get('break_per_url'): raise self._num_downloads = 0 else: if self.params.get('dump_single_json', False): self.post_extract(res) self.to_stdout(json.dumps(self.sanitize_info(res))) return wrapper def download(self, url_list): """Download a given list of URLs.""" url_list = variadic(url_list) # Passing a single URL is a common mistake outtmpl = self.params['outtmpl']['default'] if (len(url_list) > 1 and outtmpl != '-' and '%' not in outtmpl and self.params.get('max_downloads') != 1): raise SameFileError(outtmpl) for url in url_list: self.__download_wrapper(self.extract_info)( url, force_generic_extractor=self.params.get('force_generic_extractor', False)) return self._download_retcode def download_with_info_file(self, info_filename): with contextlib.closing(fileinput.FileInput( [info_filename], mode='r', openhook=fileinput.hook_encoded('utf-8'))) as f: # FileInput doesn't have a read method, we can't call json.load infos = [self.sanitize_info(info, self.params.get('clean_infojson', True)) for info in variadic(json.loads('\n'.join(f)))] for info in infos: try: self.__download_wrapper(self.process_ie_result)(info, download=True) except (DownloadError, EntryNotInPlaylist, ReExtractInfo) as e: if not isinstance(e, EntryNotInPlaylist): self.to_stderr('\r') webpage_url = info.get('webpage_url') if webpage_url is None: raise self.report_warning(f'The info failed to download: {e}; trying with URL {webpage_url}') self.download([webpage_url]) except ExtractorError as e: self.report_error(e) return self._download_retcode @staticmethod def sanitize_info(info_dict, remove_private_keys=False): """ Sanitize the infodict for converting to json """ if info_dict is None: return info_dict info_dict.setdefault('epoch', int(time.time())) info_dict.setdefault('_type', 'video') info_dict.setdefault('_version', { 'version': __version__, 'current_git_head': current_git_head(), 'release_git_head': RELEASE_GIT_HEAD, 'repository': ORIGIN, }) if remove_private_keys: reject = lambda k, v: v is None or k.startswith('__') or k in { 'requested_downloads', 'requested_formats', 'requested_subtitles', 'requested_entries', 'entries', 'filepath', '_filename', 'filename', 'infojson_filename', 'original_url', 'playlist_autonumber', } else: reject = lambda k, v: False def filter_fn(obj): if isinstance(obj, dict): return {k: filter_fn(v) for k, v in obj.items() if not reject(k, v)} elif isinstance(obj, (list, tuple, set, LazyList)): return list(map(filter_fn, obj)) elif obj is None or isinstance(obj, (str, int, float, bool)): return obj else: return repr(obj) return filter_fn(info_dict) @staticmethod def filter_requested_info(info_dict, actually_filter=True): """ Alias of sanitize_info for backward compatibility """ return YoutubeDL.sanitize_info(info_dict, actually_filter) def _delete_downloaded_files(self, *files_to_delete, info={}, msg=None): for filename in set(filter(None, files_to_delete)): if msg: self.to_screen(msg % filename) try: os.remove(filename) except OSError: self.report_warning(f'Unable to delete file {filename}') if filename in info.get('__files_to_move', []): # NB: Delete even if None del info['__files_to_move'][filename] @staticmethod def post_extract(info_dict): def actual_post_extract(info_dict): if info_dict.get('_type') in ('playlist', 'multi_video'): for video_dict in info_dict.get('entries', {}): actual_post_extract(video_dict or {}) return post_extractor = info_dict.pop('__post_extractor', None) or dict info_dict.update(post_extractor()) actual_post_extract(info_dict or {}) def run_pp(self, pp, infodict): files_to_delete = [] if '__files_to_move' not in infodict: infodict['__files_to_move'] = {} try: files_to_delete, infodict = pp.run(infodict) except PostProcessingError as e: # Must be True and not 'only_download' if self.params.get('ignoreerrors') is True: self.report_error(e) return infodict raise if not files_to_delete: return infodict if self.params.get('keepvideo', False): for f in files_to_delete: infodict['__files_to_move'].setdefault(f, '') else: self._delete_downloaded_files( *files_to_delete, info=infodict, msg='Deleting original file %s (pass -k to keep)') return infodict def run_all_pps(self, key, info, *, additional_pps=None): if key != 'video': self._forceprint(key, info) for pp in (additional_pps or []) + self._pps[key]: info = self.run_pp(pp, info) return info def pre_process(self, ie_info, key='pre_process', files_to_move=None): info = dict(ie_info) info['__files_to_move'] = files_to_move or {} try: info = self.run_all_pps(key, info) except PostProcessingError as err: msg = f'Preprocessing: {err}' info.setdefault('__pending_error', msg) self.report_error(msg, is_error=False) return info, info.pop('__files_to_move', None) def post_process(self, filename, info, files_to_move=None): """Run all the postprocessors on the given file.""" info['filepath'] = filename info['__files_to_move'] = files_to_move or {} info = self.run_all_pps('post_process', info, additional_pps=info.get('__postprocessors')) info = self.run_pp(MoveFilesAfterDownloadPP(self), info) del info['__files_to_move'] return self.run_all_pps('after_move', info) def _make_archive_id(self, info_dict): video_id = info_dict.get('id') if not video_id: return # Future-proof against any change in case # and backwards compatibility with prior versions extractor = info_dict.get('extractor_key') or info_dict.get('ie_key') # key in a playlist if extractor is None: url = str_or_none(info_dict.get('url')) if not url: return # Try to find matching extractor for the URL and take its ie_key for ie_key, ie in self._ies.items(): if ie.suitable(url): extractor = ie_key break else: return return make_archive_id(extractor, video_id) def in_download_archive(self, info_dict): if not self.archive: return False vid_ids = [self._make_archive_id(info_dict)] vid_ids.extend(info_dict.get('_old_archive_ids') or []) return any(id_ in self.archive for id_ in vid_ids) def record_download_archive(self, info_dict): fn = self.params.get('download_archive') if fn is None: return vid_id = self._make_archive_id(info_dict) assert vid_id self.write_debug(f'Adding to archive: {vid_id}') if is_path_like(fn): with locked_file(fn, 'a', encoding='utf-8') as archive_file: archive_file.write(vid_id + '\n') self.archive.add(vid_id) @staticmethod def format_resolution(format, default='unknown'): if format.get('vcodec') == 'none' and format.get('acodec') != 'none': return 'audio only' if format.get('resolution') is not None: return format['resolution'] if format.get('width') and format.get('height'): return '%dx%d' % (format['width'], format['height']) elif format.get('height'): return '{}p'.format(format['height']) elif format.get('width'): return '%dx?' % format['width'] return default def _list_format_headers(self, *headers): if self.params.get('listformats_table', True) is not False: return [self._format_out(header, self.Styles.HEADERS) for header in headers] return headers def _format_note(self, fdict): res = '' if fdict.get('ext') in ['f4f', 'f4m']: res += '(unsupported)' if fdict.get('language'): if res: res += ' ' res += '[{}]'.format(fdict['language']) if fdict.get('format_note') is not None: if res: res += ' ' res += fdict['format_note'] if fdict.get('tbr') is not None: if res: res += ', ' res += '%4dk' % fdict['tbr'] if fdict.get('container') is not None: if res: res += ', ' res += '{} container'.format(fdict['container']) if (fdict.get('vcodec') is not None and fdict.get('vcodec') != 'none'): if res: res += ', ' res += fdict['vcodec'] if fdict.get('vbr') is not None: res += '@' elif fdict.get('vbr') is not None and fdict.get('abr') is not None: res += 'video@' if fdict.get('vbr') is not None: res += '%4dk' % fdict['vbr'] if fdict.get('fps') is not None: if res: res += ', ' res += '{}fps'.format(fdict['fps']) if fdict.get('acodec') is not None: if res: res += ', ' if fdict['acodec'] == 'none': res += 'video only' else: res += '%-5s' % fdict['acodec'] elif fdict.get('abr') is not None: if res: res += ', ' res += 'audio' if fdict.get('abr') is not None: res += '@%3dk' % fdict['abr'] if fdict.get('asr') is not None: res += ' (%5dHz)' % fdict['asr'] if fdict.get('filesize') is not None: if res: res += ', ' res += format_bytes(fdict['filesize']) elif fdict.get('filesize_approx') is not None: if res: res += ', ' res += '~' + format_bytes(fdict['filesize_approx']) return res def _get_formats(self, info_dict): if info_dict.get('formats') is None: if info_dict.get('url') and info_dict.get('_type', 'video') == 'video': return [info_dict] return [] return info_dict['formats'] def render_formats_table(self, info_dict): formats = self._get_formats(info_dict) if not formats: return if not self.params.get('listformats_table', True) is not False: table = [ [ format_field(f, 'format_id'), format_field(f, 'ext'), self.format_resolution(f), self._format_note(f), ] for f in formats if (f.get('preference') or 0) >= -1000] return render_table(['format code', 'extension', 'resolution', 'note'], table, extra_gap=1) def simplified_codec(f, field): assert field in ('acodec', 'vcodec') codec = f.get(field) if not codec: return 'unknown' elif codec != 'none': return '.'.join(codec.split('.')[:4]) if field == 'vcodec' and f.get('acodec') == 'none': return 'images' elif field == 'acodec' and f.get('vcodec') == 'none': return '' return self._format_out('audio only' if field == 'vcodec' else 'video only', self.Styles.SUPPRESS) delim = self._format_out('\u2502', self.Styles.DELIM, '|', test_encoding=True) table = [ [ self._format_out(format_field(f, 'format_id'), self.Styles.ID), format_field(f, 'ext'), format_field(f, func=self.format_resolution, ignore=('audio only', 'images')), format_field(f, 'fps', '\t%d', func=round), format_field(f, 'dynamic_range', '%s', ignore=(None, 'SDR')).replace('HDR', ''), format_field(f, 'audio_channels', '\t%s'), delim, ( format_field(f, 'filesize', ' \t%s', func=format_bytes) or format_field(f, 'filesize_approx', '≈\t%s', func=format_bytes) or format_field(filesize_from_tbr(f.get('tbr'), info_dict.get('duration')), None, self._format_out('~\t%s', self.Styles.SUPPRESS), func=format_bytes)), format_field(f, 'tbr', '\t%dk', func=round), shorten_protocol_name(f.get('protocol', '')), delim, simplified_codec(f, 'vcodec'), format_field(f, 'vbr', '\t%dk', func=round), simplified_codec(f, 'acodec'), format_field(f, 'abr', '\t%dk', func=round), format_field(f, 'asr', '\t%s', func=format_decimal_suffix), join_nonempty(format_field(f, 'language', '[%s]'), join_nonempty( self._format_out('UNSUPPORTED', self.Styles.BAD_FORMAT) if f.get('ext') in ('f4f', 'f4m') else None, (self._format_out('Maybe DRM', self.Styles.WARNING) if f.get('has_drm') == 'maybe' else self._format_out('DRM', self.Styles.BAD_FORMAT) if f.get('has_drm') else None), format_field(f, 'format_note'), format_field(f, 'container', ignore=(None, f.get('ext'))), delim=', '), delim=' '), ] for f in formats if f.get('preference') is None or f['preference'] >= -1000] header_line = self._list_format_headers( 'ID', 'EXT', 'RESOLUTION', '\tFPS', 'HDR', 'CH', delim, '\tFILESIZE', '\tTBR', 'PROTO', delim, 'VCODEC', '\tVBR', 'ACODEC', '\tABR', '\tASR', 'MORE INFO') return render_table( header_line, table, hide_empty=True, delim=self._format_out('\u2500', self.Styles.DELIM, '-', test_encoding=True)) def render_thumbnails_table(self, info_dict): thumbnails = list(info_dict.get('thumbnails') or []) if not thumbnails: return None return render_table( self._list_format_headers('ID', 'Width', 'Height', 'URL'), [[t.get('id'), t.get('width') or 'unknown', t.get('height') or 'unknown', t['url']] for t in thumbnails]) def render_subtitles_table(self, video_id, subtitles): def _row(lang, formats): exts, names = zip(*((f['ext'], f.get('name') or 'unknown') for f in reversed(formats))) if len(set(names)) == 1: names = [] if names[0] == 'unknown' else names[:1] return [lang, ', '.join(names), ', '.join(exts)] if not subtitles: return None return render_table( self._list_format_headers('Language', 'Name', 'Formats'), [_row(lang, formats) for lang, formats in subtitles.items()], hide_empty=True) def __list_table(self, video_id, name, func, *args): table = func(*args) if not table: self.to_screen(f'{video_id} has no {name}') return self.to_screen(f'[info] Available {name} for {video_id}:') self.to_stdout(table) def list_formats(self, info_dict): self.__list_table(info_dict['id'], 'formats', self.render_formats_table, info_dict) def list_thumbnails(self, info_dict): self.__list_table(info_dict['id'], 'thumbnails', self.render_thumbnails_table, info_dict) def list_subtitles(self, video_id, subtitles, name='subtitles'): self.__list_table(video_id, name, self.render_subtitles_table, video_id, subtitles) def print_debug_header(self): if not self.params.get('verbose'): return from . import _IN_CLI # Must be delayed import # These imports can be slow. So import them only as needed from .extractor.extractors import _LAZY_LOADER from .extractor.extractors import ( _PLUGIN_CLASSES as plugin_ies, _PLUGIN_OVERRIDES as plugin_ie_overrides, ) def get_encoding(stream): ret = str(getattr(stream, 'encoding', f'missing ({type(stream).__name__})')) additional_info = [] if os.environ.get('TERM', '').lower() == 'dumb': additional_info.append('dumb') if not supports_terminal_sequences(stream): from .utils import WINDOWS_VT_MODE # Must be imported locally additional_info.append('No VT' if WINDOWS_VT_MODE is False else 'No ANSI') if additional_info: ret = f'{ret} ({",".join(additional_info)})' return ret encoding_str = 'Encodings: locale {}, fs {}, pref {}, {}'.format( locale.getpreferredencoding(), sys.getfilesystemencoding(), self.get_encoding(), ', '.join( f'{key} {get_encoding(stream)}' for key, stream in self._out_files.items_ if stream is not None and key != 'console'), ) logger = self.params.get('logger') if logger: write_debug = lambda msg: logger.debug(f'[debug] {msg}') write_debug(encoding_str) else: write_string(f'[debug] {encoding_str}\n', encoding=None) write_debug = lambda msg: self._write_string(f'[debug] {msg}\n') source = detect_variant() if VARIANT not in (None, 'pip'): source += '*' klass = type(self) write_debug(join_nonempty( f'{REPOSITORY.rpartition("/")[2]} version', _make_label(ORIGIN, CHANNEL.partition('@')[2] or __version__, __version__), f'[{RELEASE_GIT_HEAD[:9]}]' if RELEASE_GIT_HEAD else '', '' if source == 'unknown' else f'({source})', '' if _IN_CLI else 'API' if klass == YoutubeDL else f'API:{self.__module__}.{klass.__qualname__}', delim=' ')) if not _IN_CLI: write_debug(f'params: {self.params}') if not _LAZY_LOADER: if os.environ.get('YTDLP_NO_LAZY_EXTRACTORS'): write_debug('Lazy loading extractors is forcibly disabled') else: write_debug('Lazy loading extractors is disabled') if self.params['compat_opts']: write_debug('Compatibility options: {}'.format(', '.join(self.params['compat_opts']))) if current_git_head(): write_debug(f'Git HEAD: {current_git_head()}') write_debug(system_identifier()) exe_versions, ffmpeg_features = FFmpegPostProcessor.get_versions_and_features(self) ffmpeg_features = {key for key, val in ffmpeg_features.items() if val} if ffmpeg_features: exe_versions['ffmpeg'] += ' ({})'.format(','.join(sorted(ffmpeg_features))) exe_versions['rtmpdump'] = rtmpdump_version() exe_versions['phantomjs'] = PhantomJSwrapper._version() exe_str = ', '.join( f'{exe} {v}' for exe, v in sorted(exe_versions.items()) if v ) or 'none' write_debug(f'exe versions: {exe_str}') from .compat.compat_utils import get_package_info from .dependencies import available_dependencies write_debug('Optional libraries: %s' % (', '.join(sorted({ join_nonempty(*get_package_info(m)) for m in available_dependencies.values() })) or 'none')) write_debug(f'Proxy map: {self.proxies}') write_debug(f'Request Handlers: {", ".join(rh.RH_NAME for rh in self._request_director.handlers.values())}') for plugin_type, plugins in {'Extractor': plugin_ies, 'Post-Processor': plugin_pps}.items(): display_list = ['{}{}'.format( klass.__name__, '' if klass.__name__ == name else f' as {name}') for name, klass in plugins.items()] if plugin_type == 'Extractor': display_list.extend(f'{plugins[-1].IE_NAME.partition("+")[2]} ({parent.__name__})' for parent, plugins in plugin_ie_overrides.items()) if not display_list: continue write_debug(f'{plugin_type} Plugins: {", ".join(sorted(display_list))}') plugin_dirs = plugin_directories() if plugin_dirs: write_debug(f'Plugin directories: {plugin_dirs}') # Not implemented if False and self.params.get('call_home'): ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode() write_debug(f'Public IP address: {ipaddr}') latest_version = self.urlopen( 'https://yt-dl.org/latest/version').read().decode() if version_tuple(latest_version) > version_tuple(__version__): self.report_warning( f'You are using an outdated version (newest version: {latest_version})! ' 'See https://yt-dl.org/update if you need help updating.') @functools.cached_property def proxies(self): """Global proxy configuration""" opts_proxy = self.params.get('proxy') if opts_proxy is not None: if opts_proxy == '': opts_proxy = '__noproxy__' proxies = {'all': opts_proxy} else: proxies = urllib.request.getproxies() # compat. Set HTTPS_PROXY to __noproxy__ to revert if 'http' in proxies and 'https' not in proxies: proxies['https'] = proxies['http'] return proxies @functools.cached_property def cookiejar(self): """Global cookiejar instance""" return load_cookies( self.params.get('cookiefile'), self.params.get('cookiesfrombrowser'), self) @property def _opener(self): """ Get a urllib OpenerDirector from the Urllib handler (deprecated). """ self.deprecation_warning('YoutubeDL._opener is deprecated, use YoutubeDL.urlopen()') handler = self._request_director.handlers['Urllib'] return handler._get_instance(cookiejar=self.cookiejar, proxies=self.proxies) def _get_available_impersonate_targets(self): # TODO(future): make available as public API return [ (target, rh.RH_NAME) for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler) for target in rh.supported_targets ] def _impersonate_target_available(self, target): # TODO(future): make available as public API return any( rh.is_supported_target(target) for rh in self._request_director.handlers.values() if isinstance(rh, ImpersonateRequestHandler)) def urlopen(self, req): """ Start an HTTP download """ if isinstance(req, str): req = Request(req) elif isinstance(req, urllib.request.Request): self.deprecation_warning( 'Passing a urllib.request.Request object to YoutubeDL.urlopen() is deprecated. ' 'Use yt_dlp.networking.common.Request instead.') req = urllib_req_to_req(req) assert isinstance(req, Request) # compat: Assume user:pass url params are basic auth url, basic_auth_header = extract_basic_auth(req.url) if basic_auth_header: req.headers['Authorization'] = basic_auth_header req.url = sanitize_url(url) clean_proxies(proxies=req.proxies, headers=req.headers) clean_headers(req.headers) try: return self._request_director.send(req) except NoSupportingHandlers as e: for ue in e.unsupported_errors: # FIXME: This depends on the order of errors. if not (ue.handler and ue.msg): continue if ue.handler.RH_KEY == 'Urllib' and 'unsupported url scheme: "file"' in ue.msg.lower(): raise RequestError( 'file:// URLs are disabled by default in yt-dlp for security reasons. ' 'Use --enable-file-urls to enable at your own risk.', cause=ue) from ue if ( 'unsupported proxy type: "https"' in ue.msg.lower() and 'requests' not in self._request_director.handlers and 'curl_cffi' not in self._request_director.handlers ): raise RequestError( 'To use an HTTPS proxy for this request, one of the following dependencies needs to be installed: requests, curl_cffi') elif ( re.match(r'unsupported url scheme: "wss?"', ue.msg.lower()) and 'websockets' not in self._request_director.handlers ): raise RequestError( 'This request requires WebSocket support. ' 'Ensure one of the following dependencies are installed: websockets', cause=ue) from ue elif re.match(r'unsupported (?:extensions: impersonate|impersonate target)', ue.msg.lower()): raise RequestError( f'Impersonate target "{req.extensions["impersonate"]}" is not available.' f' See --list-impersonate-targets for available targets.' f' This request requires browser impersonation, however you may be missing dependencies' f' required to support this target.') raise except SSLError as e: if 'UNSAFE_LEGACY_RENEGOTIATION_DISABLED' in str(e): raise RequestError('UNSAFE_LEGACY_RENEGOTIATION_DISABLED: Try using --legacy-server-connect', cause=e) from e elif 'SSLV3_ALERT_HANDSHAKE_FAILURE' in str(e): raise RequestError( 'SSLV3_ALERT_HANDSHAKE_FAILURE: The server may not support the current cipher list. ' 'Try using --legacy-server-connect', cause=e) from e raise def build_request_director(self, handlers, preferences=None): logger = _YDLLogger(self) headers = self.params['http_headers'].copy() proxies = self.proxies.copy() clean_headers(headers) clean_proxies(proxies, headers) director = RequestDirector(logger=logger, verbose=self.params.get('debug_printtraffic')) for handler in handlers: director.add_handler(handler( logger=logger, headers=headers, cookiejar=self.cookiejar, proxies=proxies, prefer_system_certs='no-certifi' in self.params['compat_opts'], verify=not self.params.get('nocheckcertificate'), **traverse_obj(self.params, { 'verbose': 'debug_printtraffic', 'source_address': 'source_address', 'timeout': 'socket_timeout', 'legacy_ssl_support': 'legacyserverconnect', 'enable_file_urls': 'enable_file_urls', 'impersonate': 'impersonate', 'client_cert': { 'client_certificate': 'client_certificate', 'client_certificate_key': 'client_certificate_key', 'client_certificate_password': 'client_certificate_password', }, }), )) director.preferences.update(preferences or []) if 'prefer-legacy-http-handler' in self.params['compat_opts']: director.preferences.add(lambda rh, _: 500 if rh.RH_KEY == 'Urllib' else 0) return director @functools.cached_property def _request_director(self): return self.build_request_director(_REQUEST_HANDLERS.values(), _RH_PREFERENCES) def encode(self, s): if isinstance(s, bytes): return s # Already encoded try: return s.encode(self.get_encoding()) except UnicodeEncodeError as err: err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.' raise def get_encoding(self): encoding = self.params.get('encoding') if encoding is None: encoding = preferredencoding() return encoding def _write_info_json(self, label, ie_result, infofn, overwrite=None): """ Write infojson and returns True = written, 'exists' = Already exists, False = skip, None = error """ if overwrite is None: overwrite = self.params.get('overwrites', True) if not self.params.get('writeinfojson'): return False elif not infofn: self.write_debug(f'Skipping writing {label} infojson') return False elif not self._ensure_dir_exists(infofn): return None elif not overwrite and os.path.exists(infofn): self.to_screen(f'[info] {label.title()} metadata is already present') return 'exists' self.to_screen(f'[info] Writing {label} metadata as JSON to: {infofn}') try: write_json_file(self.sanitize_info(ie_result, self.params.get('clean_infojson', True)), infofn) return True except OSError: self.report_error(f'Cannot write {label} metadata to JSON file {infofn}') return None def _write_description(self, label, ie_result, descfn): """ Write description and returns True = written, False = skip, None = error """ if not self.params.get('writedescription'): return False elif not descfn: self.write_debug(f'Skipping writing {label} description') return False elif not self._ensure_dir_exists(descfn): return None elif not self.params.get('overwrites', True) and os.path.exists(descfn): self.to_screen(f'[info] {label.title()} description is already present') elif ie_result.get('description') is None: self.to_screen(f'[info] There\'s no {label} description to write') return False else: try: self.to_screen(f'[info] Writing {label} description to: {descfn}') with open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: descfile.write(ie_result['description']) except OSError: self.report_error(f'Cannot write {label} description file {descfn}') return None return True def _write_subtitles(self, info_dict, filename): """ Write subtitles to file and return list of (sub_filename, final_sub_filename); or None if error""" ret = [] subtitles = info_dict.get('requested_subtitles') if not (self.params.get('writesubtitles') or self.params.get('writeautomaticsub')): # subtitles download errors are already managed as troubles in relevant IE # that way it will silently go on when used with unsupporting IE return ret elif not subtitles: self.to_screen('[info] There are no subtitles for the requested languages') return ret sub_filename_base = self.prepare_filename(info_dict, 'subtitle') if not sub_filename_base: self.to_screen('[info] Skipping writing video subtitles') return ret for sub_lang, sub_info in subtitles.items(): sub_format = sub_info['ext'] sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext')) sub_filename_final = subtitles_filename(sub_filename_base, sub_lang, sub_format, info_dict.get('ext')) existing_sub = self.existing_file((sub_filename_final, sub_filename)) if existing_sub: self.to_screen(f'[info] Video subtitle {sub_lang}.{sub_format} is already present') sub_info['filepath'] = existing_sub ret.append((existing_sub, sub_filename_final)) continue self.to_screen(f'[info] Writing video subtitles to: {sub_filename}') if sub_info.get('data') is not None: try: # Use newline='' to prevent conversion of newline characters # See https://github.com/ytdl-org/youtube-dl/issues/10268 with open(sub_filename, 'w', encoding='utf-8', newline='') as subfile: subfile.write(sub_info['data']) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) continue except OSError: self.report_error(f'Cannot write video subtitles file {sub_filename}') return None try: sub_copy = sub_info.copy() sub_copy.setdefault('http_headers', info_dict.get('http_headers')) self.dl(sub_filename, sub_copy, subtitle=True) sub_info['filepath'] = sub_filename ret.append((sub_filename, sub_filename_final)) except (DownloadError, ExtractorError, OSError, ValueError, *network_exceptions) as err: msg = f'Unable to download video subtitles for {sub_lang!r}: {err}' if self.params.get('ignoreerrors') is not True: # False or 'only_download' if not self.params.get('ignoreerrors'): self.report_error(msg) raise DownloadError(msg) self.report_warning(msg) return ret def _write_thumbnails(self, label, info_dict, filename, thumb_filename_base=None): """ Write thumbnails to file and return list of (thumb_filename, final_thumb_filename); or None if error """ write_all = self.params.get('write_all_thumbnails', False) thumbnails, ret = [], [] if write_all or self.params.get('writethumbnail', False): thumbnails = info_dict.get('thumbnails') or [] if not thumbnails: self.to_screen(f'[info] There are no {label} thumbnails to download') return ret multiple = write_all and len(thumbnails) > 1 if thumb_filename_base is None: thumb_filename_base = filename if thumbnails and not thumb_filename_base: self.write_debug(f'Skipping writing {label} thumbnail') return ret if thumbnails and not self._ensure_dir_exists(filename): return None for idx, t in list(enumerate(thumbnails))[::-1]: thumb_ext = (f'{t["id"]}.' if multiple else '') + determine_ext(t['url'], 'jpg') thumb_display_id = f'{label} thumbnail {t["id"]}' thumb_filename = replace_extension(filename, thumb_ext, info_dict.get('ext')) thumb_filename_final = replace_extension(thumb_filename_base, thumb_ext, info_dict.get('ext')) existing_thumb = self.existing_file((thumb_filename_final, thumb_filename)) if existing_thumb: self.to_screen('[info] {} is already present'.format(( thumb_display_id if multiple else f'{label} thumbnail').capitalize())) t['filepath'] = existing_thumb ret.append((existing_thumb, thumb_filename_final)) else: self.to_screen(f'[info] Downloading {thumb_display_id} ...') try: uf = self.urlopen(Request(t['url'], headers=t.get('http_headers', {}))) self.to_screen(f'[info] Writing {thumb_display_id} to: {thumb_filename}') with open(encodeFilename(thumb_filename), 'wb') as thumbf: shutil.copyfileobj(uf, thumbf) ret.append((thumb_filename, thumb_filename_final)) t['filepath'] = thumb_filename except network_exceptions as err: if isinstance(err, HTTPError) and err.status == 404: self.to_screen(f'[info] {thumb_display_id.title()} does not exist') else: self.report_warning(f'Unable to download {thumb_display_id}: {err}') thumbnails.pop(idx) if ret and not write_all: break return ret yt-dlp-2024.09.27/yt_dlp/__init__.py000066400000000000000000001362111467563447100167630ustar00rootroot00000000000000import sys if sys.version_info < (3, 8): raise ImportError( f'You are using an unsupported version of Python. Only Python versions 3.8 and above are supported by yt-dlp') # noqa: F541 __license__ = 'The Unlicense' import collections import getpass import itertools import optparse import os import re import traceback from .compat import compat_os_name from .cookies import SUPPORTED_BROWSERS, SUPPORTED_KEYRINGS from .downloader.external import get_external_downloader from .extractor import list_extractor_classes from .extractor.adobepass import MSO_INFO from .networking.impersonate import ImpersonateTarget from .options import parseOpts from .postprocessor import ( FFmpegExtractAudioPP, FFmpegMergerPP, FFmpegPostProcessor, FFmpegSubtitlesConvertorPP, FFmpegThumbnailsConvertorPP, FFmpegVideoConvertorPP, FFmpegVideoRemuxerPP, MetadataFromFieldPP, MetadataParserPP, ) from .update import Updater from .utils import ( NO_DEFAULT, POSTPROCESS_WHEN, DateRange, DownloadCancelled, DownloadError, FormatSorter, GeoUtils, PlaylistEntries, SameFileError, decodeOption, download_range_func, expand_path, float_or_none, format_field, int_or_none, join_nonempty, match_filter_func, parse_bytes, parse_duration, preferredencoding, read_batch_urls, read_stdin, render_table, setproctitle, shell_quote, traverse_obj, variadic, write_string, ) from .utils.networking import std_headers from .utils._utils import _UnsafeExtensionError from .YoutubeDL import YoutubeDL _IN_CLI = False def _exit(status=0, *args): for msg in args: sys.stderr.write(msg) raise SystemExit(status) def get_urls(urls, batchfile, verbose): """ @param verbose -1: quiet, 0: normal, 1: verbose """ batch_urls = [] if batchfile is not None: try: batch_urls = read_batch_urls( read_stdin(None if verbose == -1 else 'URLs') if batchfile == '-' else open(expand_path(batchfile), encoding='utf-8', errors='ignore')) if verbose == 1: write_string('[debug] Batch file urls: ' + repr(batch_urls) + '\n') except OSError: _exit(f'ERROR: batch file {batchfile} could not be read') _enc = preferredencoding() return [ url.strip().decode(_enc, 'ignore') if isinstance(url, bytes) else url.strip() for url in batch_urls + urls] def print_extractor_information(opts, urls): out = '' if opts.list_extractors: # Importing GenericIE is currently slow since it imports YoutubeIE from .extractor.generic import GenericIE urls = dict.fromkeys(urls, False) for ie in list_extractor_classes(opts.age_limit): out += ie.IE_NAME + (' (CURRENTLY BROKEN)' if not ie.working() else '') + '\n' if ie == GenericIE: matched_urls = [url for url, matched in urls.items() if not matched] else: matched_urls = tuple(filter(ie.suitable, urls.keys())) urls.update(dict.fromkeys(matched_urls, True)) out += ''.join(f' {url}\n' for url in matched_urls) elif opts.list_extractor_descriptions: _SEARCHES = ('cute kittens', 'slithering pythons', 'falling cat', 'angry poodle', 'purple fish', 'running tortoise', 'sleeping bunny', 'burping cow') out = '\n'.join( ie.description(markdown=False, search_examples=_SEARCHES) for ie in list_extractor_classes(opts.age_limit) if ie.working() and ie.IE_DESC is not False) elif opts.ap_list_mso: out = 'Supported TV Providers:\n{}\n'.format(render_table( ['mso', 'mso name'], [[mso_id, mso_info['name']] for mso_id, mso_info in MSO_INFO.items()])) else: return False write_string(out, out=sys.stdout) return True def set_compat_opts(opts): def _unused_compat_opt(name): if name not in opts.compat_opts: return False opts.compat_opts.discard(name) opts.compat_opts.update([f'*{name}']) return True def set_default_compat(compat_name, opt_name, default=True, remove_compat=True): attr = getattr(opts, opt_name) if compat_name in opts.compat_opts: if attr is None: setattr(opts, opt_name, not default) return True else: if remove_compat: _unused_compat_opt(compat_name) return False elif attr is None: setattr(opts, opt_name, default) return None set_default_compat('abort-on-error', 'ignoreerrors', 'only_download') set_default_compat('no-playlist-metafiles', 'allow_playlist_files') set_default_compat('no-clean-infojson', 'clean_infojson') if 'no-attach-info-json' in opts.compat_opts: if opts.embed_infojson: _unused_compat_opt('no-attach-info-json') else: opts.embed_infojson = False if 'format-sort' in opts.compat_opts: opts.format_sort.extend(FormatSorter.ytdl_default) _video_multistreams_set = set_default_compat('multistreams', 'allow_multiple_video_streams', False, remove_compat=False) _audio_multistreams_set = set_default_compat('multistreams', 'allow_multiple_audio_streams', False, remove_compat=False) if _video_multistreams_set is False and _audio_multistreams_set is False: _unused_compat_opt('multistreams') if 'filename' in opts.compat_opts: if opts.outtmpl.get('default') is None: opts.outtmpl.update({'default': '%(title)s-%(id)s.%(ext)s'}) else: _unused_compat_opt('filename') def validate_options(opts): def validate(cndn, name, value=None, msg=None): if cndn: return True raise ValueError((msg or 'invalid {name} "{value}" given').format(name=name, value=value)) def validate_in(name, value, items, msg=None): return validate(value is None or value in items, name, value, msg) def validate_regex(name, value, regex): return validate(value is None or re.match(regex, value), name, value) def validate_positive(name, value, strict=False): return validate(value is None or value > 0 or (not strict and value == 0), name, value, '{name} "{value}" must be positive' + ('' if strict else ' or 0')) def validate_minmax(min_val, max_val, min_name, max_name=None): if max_val is None or min_val is None or max_val >= min_val: return if not max_name: min_name, max_name = f'min {min_name}', f'max {min_name}' raise ValueError(f'{max_name} "{max_val}" must be must be greater than or equal to {min_name} "{min_val}"') # Usernames and passwords validate(sum(map(bool, (opts.usenetrc, opts.netrc_cmd, opts.username))) <= 1, '.netrc', msg='{name}, netrc command and username/password are mutually exclusive options') validate(opts.password is None or opts.username is not None, 'account username', msg='{name} missing') validate(opts.ap_password is None or opts.ap_username is not None, 'TV Provider account username', msg='{name} missing') validate_in('TV Provider', opts.ap_mso, MSO_INFO, 'Unsupported {name} "{value}", use --ap-list-mso to get a list of supported TV Providers') # Numbers validate_positive('autonumber start', opts.autonumber_start) validate_positive('autonumber size', opts.autonumber_size, True) validate_positive('concurrent fragments', opts.concurrent_fragment_downloads, True) validate_positive('playlist start', opts.playliststart, True) if opts.playlistend != -1: validate_minmax(opts.playliststart, opts.playlistend, 'playlist start', 'playlist end') # Time ranges validate_positive('subtitles sleep interval', opts.sleep_interval_subtitles) validate_positive('requests sleep interval', opts.sleep_interval_requests) validate_positive('sleep interval', opts.sleep_interval) validate_positive('max sleep interval', opts.max_sleep_interval) if opts.sleep_interval is None: validate( opts.max_sleep_interval is None, 'min sleep interval', msg='{name} must be specified; use --min-sleep-interval') elif opts.max_sleep_interval is None: opts.max_sleep_interval = opts.sleep_interval else: validate_minmax(opts.sleep_interval, opts.max_sleep_interval, 'sleep interval') if opts.wait_for_video is not None: min_wait, max_wait, *_ = map(parse_duration, [*opts.wait_for_video.split('-', 1), None]) validate(min_wait is not None and not (max_wait is None and '-' in opts.wait_for_video), 'time range to wait for video', opts.wait_for_video) validate_minmax(min_wait, max_wait, 'time range to wait for video') opts.wait_for_video = (min_wait, max_wait) # Format sort for f in opts.format_sort: validate_regex('format sorting', f, FormatSorter.regex) # Postprocessor formats if opts.convertsubtitles == 'none': opts.convertsubtitles = None if opts.convertthumbnails == 'none': opts.convertthumbnails = None validate_regex('merge output format', opts.merge_output_format, r'({0})(/({0}))*'.format('|'.join(map(re.escape, FFmpegMergerPP.SUPPORTED_EXTS)))) validate_regex('audio format', opts.audioformat, FFmpegExtractAudioPP.FORMAT_RE) validate_in('subtitle format', opts.convertsubtitles, FFmpegSubtitlesConvertorPP.SUPPORTED_EXTS) validate_regex('thumbnail format', opts.convertthumbnails, FFmpegThumbnailsConvertorPP.FORMAT_RE) validate_regex('recode video format', opts.recodevideo, FFmpegVideoConvertorPP.FORMAT_RE) validate_regex('remux video format', opts.remuxvideo, FFmpegVideoRemuxerPP.FORMAT_RE) if opts.audioquality: opts.audioquality = opts.audioquality.strip('k').strip('K') # int_or_none prevents inf, nan validate_positive('audio quality', int_or_none(float_or_none(opts.audioquality), default=0)) # Retries def parse_retries(name, value): if value is None: return None elif value in ('inf', 'infinite'): return float('inf') try: return int(value) except (TypeError, ValueError): validate(False, f'{name} retry count', value) opts.retries = parse_retries('download', opts.retries) opts.fragment_retries = parse_retries('fragment', opts.fragment_retries) opts.extractor_retries = parse_retries('extractor', opts.extractor_retries) opts.file_access_retries = parse_retries('file access', opts.file_access_retries) # Retry sleep function def parse_sleep_func(expr): NUMBER_RE = r'\d+(?:\.\d+)?' op, start, limit, step, *_ = (*tuple(re.fullmatch( rf'(?:(linear|exp)=)?({NUMBER_RE})(?::({NUMBER_RE})?)?(?::({NUMBER_RE}))?', expr.strip()).groups()), None, None) if op == 'exp': return lambda n: min(float(start) * (float(step or 2) ** n), float(limit or 'inf')) else: default_step = start if op or limit else 0 return lambda n: min(float(start) + float(step or default_step) * n, float(limit or 'inf')) for key, expr in opts.retry_sleep.items(): if not expr: del opts.retry_sleep[key] continue try: opts.retry_sleep[key] = parse_sleep_func(expr) except AttributeError: raise ValueError(f'invalid {key} retry sleep expression {expr!r}') # Bytes def validate_bytes(name, value): if value is None: return None numeric_limit = parse_bytes(value) validate(numeric_limit is not None, 'rate limit', value) return numeric_limit opts.ratelimit = validate_bytes('rate limit', opts.ratelimit) opts.throttledratelimit = validate_bytes('throttled rate limit', opts.throttledratelimit) opts.min_filesize = validate_bytes('min filesize', opts.min_filesize) opts.max_filesize = validate_bytes('max filesize', opts.max_filesize) opts.buffersize = validate_bytes('buffer size', opts.buffersize) opts.http_chunk_size = validate_bytes('http chunk size', opts.http_chunk_size) # Output templates def validate_outtmpl(tmpl, msg): err = YoutubeDL.validate_outtmpl(tmpl) if err: raise ValueError(f'invalid {msg} "{tmpl}": {err}') for k, tmpl in opts.outtmpl.items(): validate_outtmpl(tmpl, f'{k} output template') for type_, tmpl_list in opts.forceprint.items(): for tmpl in tmpl_list: validate_outtmpl(tmpl, f'{type_} print template') for type_, tmpl_list in opts.print_to_file.items(): for tmpl, file in tmpl_list: validate_outtmpl(tmpl, f'{type_} print to file template') validate_outtmpl(file, f'{type_} print to file filename') validate_outtmpl(opts.sponsorblock_chapter_title, 'SponsorBlock chapter title') for k, tmpl in opts.progress_template.items(): k = f'{k[:-6]} console title' if '-title' in k else f'{k} progress' validate_outtmpl(tmpl, f'{k} template') outtmpl_default = opts.outtmpl.get('default') if outtmpl_default == '': opts.skip_download = None del opts.outtmpl['default'] def parse_chapters(name, value, advanced=False): parse_timestamp = lambda x: float('inf') if x in ('inf', 'infinite') else parse_duration(x) TIMESTAMP_RE = r'''(?x)(?: (?P-?)(?P[^-]+) )?\s*-\s*(?: (?P-?)(?P[^-]+) )?''' chapters, ranges, from_url = [], [], False for regex in value or []: if advanced and regex == '*from-url': from_url = True continue elif not regex.startswith('*'): try: chapters.append(re.compile(regex)) except re.error as err: raise ValueError(f'invalid {name} regex "{regex}" - {err}') continue for range_ in map(str.strip, regex[1:].split(',')): mobj = range_ != '-' and re.fullmatch(TIMESTAMP_RE, range_) dur = mobj and [parse_timestamp(mobj.group('start') or '0'), parse_timestamp(mobj.group('end') or 'inf')] signs = mobj and (mobj.group('start_sign'), mobj.group('end_sign')) err = None if None in (dur or [None]): err = 'Must be of the form "*start-end"' elif not advanced and any(signs): err = 'Negative timestamps are not allowed' else: dur[0] *= -1 if signs[0] else 1 dur[1] *= -1 if signs[1] else 1 if dur[1] == float('-inf'): err = '"-inf" is not a valid end' if err: raise ValueError(f'invalid {name} time range "{regex}". {err}') ranges.append(dur) return chapters, ranges, from_url opts.remove_chapters, opts.remove_ranges, _ = parse_chapters('--remove-chapters', opts.remove_chapters) opts.download_ranges = download_range_func(*parse_chapters('--download-sections', opts.download_ranges, True)) # Cookies from browser if opts.cookiesfrombrowser: container = None mobj = re.fullmatch(r'''(?x) (?P[^+:]+) (?:\s*\+\s*(?P[^:]+))? (?:\s*:\s*(?!:)(?P.+?))? (?:\s*::\s*(?P.+))? ''', opts.cookiesfrombrowser) if mobj is None: raise ValueError(f'invalid cookies from browser arguments: {opts.cookiesfrombrowser}') browser_name, keyring, profile, container = mobj.group('name', 'keyring', 'profile', 'container') browser_name = browser_name.lower() if browser_name not in SUPPORTED_BROWSERS: raise ValueError(f'unsupported browser specified for cookies: "{browser_name}". ' f'Supported browsers are: {", ".join(sorted(SUPPORTED_BROWSERS))}') if keyring is not None: keyring = keyring.upper() if keyring not in SUPPORTED_KEYRINGS: raise ValueError(f'unsupported keyring specified for cookies: "{keyring}". ' f'Supported keyrings are: {", ".join(sorted(SUPPORTED_KEYRINGS))}') opts.cookiesfrombrowser = (browser_name, profile, keyring, container) if opts.impersonate is not None: opts.impersonate = ImpersonateTarget.from_str(opts.impersonate.lower()) # MetadataParser def metadataparser_actions(f): if isinstance(f, str): cmd = f'--parse-metadata {shell_quote(f)}' try: actions = [MetadataFromFieldPP.to_action(f)] except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') else: cmd = f'--replace-in-metadata {shell_quote(f)}' actions = ((MetadataParserPP.Actions.REPLACE, x, *f[1:]) for x in f[0].split(',')) for action in actions: try: MetadataParserPP.validate_action(*action) except Exception as err: raise ValueError(f'{cmd} is invalid; {err}') yield action if opts.metafromtitle is not None: opts.parse_metadata.setdefault('pre_process', []).append(f'title:{opts.metafromtitle}') opts.parse_metadata = { k: list(itertools.chain(*map(metadataparser_actions, v))) for k, v in opts.parse_metadata.items() } # Other options if opts.playlist_items is not None: try: tuple(PlaylistEntries.parse_playlist_items(opts.playlist_items)) except Exception as err: raise ValueError(f'Invalid playlist-items {opts.playlist_items!r}: {err}') opts.geo_bypass_country, opts.geo_bypass_ip_block = None, None if opts.geo_bypass.lower() not in ('default', 'never'): try: GeoUtils.random_ipv4(opts.geo_bypass) except Exception: raise ValueError(f'Unsupported --xff "{opts.geo_bypass}"') if len(opts.geo_bypass) == 2: opts.geo_bypass_country = opts.geo_bypass else: opts.geo_bypass_ip_block = opts.geo_bypass opts.geo_bypass = opts.geo_bypass.lower() != 'never' opts.match_filter = match_filter_func(opts.match_filter, opts.breaking_match_filter) if opts.download_archive is not None: opts.download_archive = expand_path(opts.download_archive) if opts.ffmpeg_location is not None: opts.ffmpeg_location = expand_path(opts.ffmpeg_location) if opts.user_agent is not None: opts.headers.setdefault('User-Agent', opts.user_agent) if opts.referer is not None: opts.headers.setdefault('Referer', opts.referer) if opts.no_sponsorblock: opts.sponsorblock_mark = opts.sponsorblock_remove = set() default_downloader = None for proto, path in opts.external_downloader.items(): if path == 'native': continue ed = get_external_downloader(path) if ed is None: raise ValueError( f'No such {format_field(proto, None, "%s ", ignore="default")}external downloader "{path}"') elif ed and proto == 'default': default_downloader = ed.get_basename() for policy in opts.color.values(): if policy not in ('always', 'auto', 'auto-tty', 'no_color', 'no_color-tty', 'never'): raise ValueError(f'"{policy}" is not a valid color policy') warnings, deprecation_warnings = [], [] # Common mistake: -f best if opts.format == 'best': warnings.append('.\n '.join(( '"-f best" selects the best pre-merged format which is often not the best option', 'To let yt-dlp download and merge the best available formats, simply do not pass any format selection', 'If you know what you are doing and want only the best pre-merged format, use "-f b" instead to suppress this warning'))) # --(postprocessor/downloader)-args without name def report_args_compat(name, value, key1, key2=None, where=None): if key1 in value and key2 not in value: warnings.append(f'{name.title()} arguments given without specifying name. ' f'The arguments will be given to {where or f"all {name}s"}') return True return False if report_args_compat('external downloader', opts.external_downloader_args, 'default', where=default_downloader) and default_downloader: # Compat with youtube-dl's behavior. See https://github.com/ytdl-org/youtube-dl/commit/49c5293014bc11ec8c009856cd63cffa6296c1e1 opts.external_downloader_args.setdefault(default_downloader, opts.external_downloader_args.pop('default')) if report_args_compat('post-processor', opts.postprocessor_args, 'default-compat', 'default'): opts.postprocessor_args['default'] = opts.postprocessor_args.pop('default-compat') opts.postprocessor_args.setdefault('sponskrub', []) def report_conflict(arg1, opt1, arg2='--allow-unplayable-formats', opt2='allow_unplayable_formats', val1=NO_DEFAULT, val2=NO_DEFAULT, default=False): if val2 is NO_DEFAULT: val2 = getattr(opts, opt2) if not val2: return if val1 is NO_DEFAULT: val1 = getattr(opts, opt1) if val1: warnings.append(f'{arg1} is ignored since {arg2} was given') setattr(opts, opt1, default) # Conflicting options report_conflict('--playlist-reverse', 'playlist_reverse', '--playlist-random', 'playlist_random') report_conflict('--playlist-reverse', 'playlist_reverse', '--lazy-playlist', 'lazy_playlist') report_conflict('--playlist-random', 'playlist_random', '--lazy-playlist', 'lazy_playlist') report_conflict('--dateafter', 'dateafter', '--date', 'date', default=None) report_conflict('--datebefore', 'datebefore', '--date', 'date', default=None) report_conflict('--exec-before-download', 'exec_before_dl_cmd', '"--exec before_dl:"', 'exec_cmd', val2=opts.exec_cmd.get('before_dl')) report_conflict('--id', 'useid', '--output', 'outtmpl', val2=opts.outtmpl.get('default')) report_conflict('--remux-video', 'remuxvideo', '--recode-video', 'recodevideo') report_conflict('--sponskrub', 'sponskrub', '--remove-chapters', 'remove_chapters') report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-mark', 'sponsorblock_mark') report_conflict('--sponskrub', 'sponskrub', '--sponsorblock-remove', 'sponsorblock_remove') report_conflict('--sponskrub-cut', 'sponskrub_cut', '--split-chapter', 'split_chapters', val1=opts.sponskrub and opts.sponskrub_cut) # Conflicts with --allow-unplayable-formats report_conflict('--embed-metadata', 'addmetadata') report_conflict('--embed-chapters', 'addchapters') report_conflict('--embed-info-json', 'embed_infojson') report_conflict('--embed-subs', 'embedsubtitles') report_conflict('--embed-thumbnail', 'embedthumbnail') report_conflict('--extract-audio', 'extractaudio') report_conflict('--fixup', 'fixup', val1=opts.fixup not in (None, 'never', 'ignore'), default='never') report_conflict('--recode-video', 'recodevideo') report_conflict('--remove-chapters', 'remove_chapters', default=[]) report_conflict('--remux-video', 'remuxvideo') report_conflict('--sponskrub', 'sponskrub') report_conflict('--sponsorblock-remove', 'sponsorblock_remove', default=set()) report_conflict('--xattrs', 'xattrs') # Fully deprecated options def report_deprecation(val, old, new=None): if not val: return deprecation_warnings.append( f'{old} is deprecated and may be removed in a future version. Use {new} instead' if new else f'{old} is deprecated and may not work as expected') report_deprecation(opts.sponskrub, '--sponskrub', '--sponsorblock-mark or --sponsorblock-remove') report_deprecation(not opts.prefer_ffmpeg, '--prefer-avconv', 'ffmpeg') # report_deprecation(opts.include_ads, '--include-ads') # We may re-implement this in future # report_deprecation(opts.call_home, '--call-home') # We may re-implement this in future # report_deprecation(opts.writeannotations, '--write-annotations') # It's just that no website has it # Dependent options opts.date = DateRange.day(opts.date) if opts.date else DateRange(opts.dateafter, opts.datebefore) if opts.exec_before_dl_cmd: opts.exec_cmd['before_dl'] = opts.exec_before_dl_cmd if opts.useid: # --id is not deprecated in youtube-dl opts.outtmpl['default'] = '%(id)s.%(ext)s' if opts.overwrites: # --force-overwrites implies --no-continue opts.continue_dl = False if (opts.addmetadata or opts.sponsorblock_mark) and opts.addchapters is None: # Add chapters when adding metadata or marking sponsors opts.addchapters = True if opts.extractaudio and not opts.keepvideo and opts.format is None: # Do not unnecessarily download audio opts.format = 'bestaudio/best' if opts.getcomments and opts.writeinfojson is None and not opts.embed_infojson: # If JSON is not printed anywhere, but comments are requested, save it to file if not opts.dumpjson or opts.print_json or opts.dump_single_json: opts.writeinfojson = True if opts.allsubtitles and not (opts.embedsubtitles or opts.writeautomaticsub): # --all-sub automatically sets --write-sub if --write-auto-sub is not given opts.writesubtitles = True if opts.addmetadata and opts.embed_infojson is None: # If embedding metadata and infojson is present, embed it opts.embed_infojson = 'if_exists' # Ask for passwords if opts.username is not None and opts.password is None: opts.password = getpass.getpass('Type account password and press [Return]: ') if opts.ap_username is not None and opts.ap_password is None: opts.ap_password = getpass.getpass('Type TV provider account password and press [Return]: ') # compat option changes global state destructively; only allow from cli if 'allow-unsafe-ext' in opts.compat_opts: warnings.append( 'Using allow-unsafe-ext opens you up to potential attacks. ' 'Use with great care!') _UnsafeExtensionError.sanitize_extension = lambda x, prepend=False: x return warnings, deprecation_warnings def get_postprocessors(opts): yield from opts.add_postprocessors for when, actions in opts.parse_metadata.items(): yield { 'key': 'MetadataParser', 'actions': actions, 'when': when, } sponsorblock_query = opts.sponsorblock_mark | opts.sponsorblock_remove if sponsorblock_query: yield { 'key': 'SponsorBlock', 'categories': sponsorblock_query, 'api': opts.sponsorblock_api, 'when': 'after_filter', } if opts.convertsubtitles: yield { 'key': 'FFmpegSubtitlesConvertor', 'format': opts.convertsubtitles, 'when': 'before_dl', } if opts.convertthumbnails: yield { 'key': 'FFmpegThumbnailsConvertor', 'format': opts.convertthumbnails, 'when': 'before_dl', } if opts.extractaudio: yield { 'key': 'FFmpegExtractAudio', 'preferredcodec': opts.audioformat, 'preferredquality': opts.audioquality, 'nopostoverwrites': opts.nopostoverwrites, } if opts.remuxvideo: yield { 'key': 'FFmpegVideoRemuxer', 'preferedformat': opts.remuxvideo, } if opts.recodevideo: yield { 'key': 'FFmpegVideoConvertor', 'preferedformat': opts.recodevideo, } # If ModifyChapters is going to remove chapters, subtitles must already be in the container. if opts.embedsubtitles: keep_subs = 'no-keep-subs' not in opts.compat_opts yield { 'key': 'FFmpegEmbedSubtitle', # already_have_subtitle = True prevents the file from being deleted after embedding 'already_have_subtitle': opts.writesubtitles and keep_subs, } if not opts.writeautomaticsub and keep_subs: opts.writesubtitles = True # ModifyChapters must run before FFmpegMetadataPP if opts.remove_chapters or sponsorblock_query: yield { 'key': 'ModifyChapters', 'remove_chapters_patterns': opts.remove_chapters, 'remove_sponsor_segments': opts.sponsorblock_remove, 'remove_ranges': opts.remove_ranges, 'sponsorblock_chapter_title': opts.sponsorblock_chapter_title, 'force_keyframes': opts.force_keyframes_at_cuts, } # FFmpegMetadataPP should be run after FFmpegVideoConvertorPP and # FFmpegExtractAudioPP as containers before conversion may not support # metadata (3gp, webm, etc.) # By default ffmpeg preserves metadata applicable for both # source and target containers. From this point the container won't change, # so metadata can be added here. if opts.addmetadata or opts.addchapters or opts.embed_infojson: yield { 'key': 'FFmpegMetadata', 'add_chapters': opts.addchapters, 'add_metadata': opts.addmetadata, 'add_infojson': opts.embed_infojson, } # Deprecated # This should be above EmbedThumbnail since sponskrub removes the thumbnail attachment # but must be below EmbedSubtitle and FFmpegMetadata # See https://github.com/yt-dlp/yt-dlp/issues/204 , https://github.com/faissaloo/SponSkrub/issues/29 # If opts.sponskrub is None, sponskrub is used, but it silently fails if the executable can't be found if opts.sponskrub is not False: yield { 'key': 'SponSkrub', 'path': opts.sponskrub_path, 'args': opts.sponskrub_args, 'cut': opts.sponskrub_cut, 'force': opts.sponskrub_force, 'ignoreerror': opts.sponskrub is None, '_from_cli': True, } if opts.embedthumbnail: yield { 'key': 'EmbedThumbnail', # already_have_thumbnail = True prevents the file from being deleted after embedding 'already_have_thumbnail': opts.writethumbnail, } if not opts.writethumbnail: opts.writethumbnail = True opts.outtmpl['pl_thumbnail'] = '' if opts.split_chapters: yield { 'key': 'FFmpegSplitChapters', 'force_keyframes': opts.force_keyframes_at_cuts, } # XAttrMetadataPP should be run after post-processors that may change file contents if opts.xattrs: yield {'key': 'XAttrMetadata'} if opts.concat_playlist != 'never': yield { 'key': 'FFmpegConcat', 'only_multi_video': opts.concat_playlist != 'always', 'when': 'playlist', } # Exec must be the last PP of each category for when, exec_cmd in opts.exec_cmd.items(): yield { 'key': 'Exec', 'exec_cmd': exec_cmd, 'when': when, } ParsedOptions = collections.namedtuple('ParsedOptions', ('parser', 'options', 'urls', 'ydl_opts')) def parse_options(argv=None): """@returns ParsedOptions(parser, opts, urls, ydl_opts)""" parser, opts, urls = parseOpts(argv) urls = get_urls(urls, opts.batchfile, -1 if opts.quiet and not opts.verbose else opts.verbose) set_compat_opts(opts) try: warnings, deprecation_warnings = validate_options(opts) except ValueError as err: parser.error(f'{err}\n') postprocessors = list(get_postprocessors(opts)) print_only = bool(opts.forceprint) and all(k not in opts.forceprint for k in POSTPROCESS_WHEN[3:]) any_getting = any(getattr(opts, k) for k in ( 'dumpjson', 'dump_single_json', 'getdescription', 'getduration', 'getfilename', 'getformat', 'getid', 'getthumbnail', 'gettitle', 'geturl', )) if opts.quiet is None: opts.quiet = any_getting or opts.print_json or bool(opts.forceprint) playlist_pps = [pp for pp in postprocessors if pp.get('when') == 'playlist'] write_playlist_infojson = (opts.writeinfojson and not opts.clean_infojson and opts.allow_playlist_files and opts.outtmpl.get('pl_infojson') != '') if not any(( opts.extract_flat, opts.dump_single_json, opts.forceprint.get('playlist'), opts.print_to_file.get('playlist'), write_playlist_infojson, )): if not playlist_pps: opts.extract_flat = 'discard' elif playlist_pps == [{'key': 'FFmpegConcat', 'only_multi_video': True, 'when': 'playlist'}]: opts.extract_flat = 'discard_in_playlist' final_ext = ( opts.recodevideo if opts.recodevideo in FFmpegVideoConvertorPP.SUPPORTED_EXTS else opts.remuxvideo if opts.remuxvideo in FFmpegVideoRemuxerPP.SUPPORTED_EXTS else opts.audioformat if (opts.extractaudio and opts.audioformat in FFmpegExtractAudioPP.SUPPORTED_EXTS) else None) return ParsedOptions(parser, opts, urls, { 'usenetrc': opts.usenetrc, 'netrc_location': opts.netrc_location, 'netrc_cmd': opts.netrc_cmd, 'username': opts.username, 'password': opts.password, 'twofactor': opts.twofactor, 'videopassword': opts.videopassword, 'ap_mso': opts.ap_mso, 'ap_username': opts.ap_username, 'ap_password': opts.ap_password, 'client_certificate': opts.client_certificate, 'client_certificate_key': opts.client_certificate_key, 'client_certificate_password': opts.client_certificate_password, 'quiet': opts.quiet, 'no_warnings': opts.no_warnings, 'forceurl': opts.geturl, 'forcetitle': opts.gettitle, 'forceid': opts.getid, 'forcethumbnail': opts.getthumbnail, 'forcedescription': opts.getdescription, 'forceduration': opts.getduration, 'forcefilename': opts.getfilename, 'forceformat': opts.getformat, 'forceprint': opts.forceprint, 'print_to_file': opts.print_to_file, 'forcejson': opts.dumpjson or opts.print_json, 'dump_single_json': opts.dump_single_json, 'force_write_download_archive': opts.force_write_download_archive, 'simulate': (print_only or any_getting or None) if opts.simulate is None else opts.simulate, 'skip_download': opts.skip_download, 'format': opts.format, 'allow_unplayable_formats': opts.allow_unplayable_formats, 'ignore_no_formats_error': opts.ignore_no_formats_error, 'format_sort': opts.format_sort, 'format_sort_force': opts.format_sort_force, 'allow_multiple_video_streams': opts.allow_multiple_video_streams, 'allow_multiple_audio_streams': opts.allow_multiple_audio_streams, 'check_formats': opts.check_formats, 'listformats': opts.listformats, 'listformats_table': opts.listformats_table, 'outtmpl': opts.outtmpl, 'outtmpl_na_placeholder': opts.outtmpl_na_placeholder, 'paths': opts.paths, 'autonumber_size': opts.autonumber_size, 'autonumber_start': opts.autonumber_start, 'restrictfilenames': opts.restrictfilenames, 'windowsfilenames': opts.windowsfilenames, 'ignoreerrors': opts.ignoreerrors, 'force_generic_extractor': opts.force_generic_extractor, 'allowed_extractors': opts.allowed_extractors or ['default'], 'ratelimit': opts.ratelimit, 'throttledratelimit': opts.throttledratelimit, 'overwrites': opts.overwrites, 'retries': opts.retries, 'file_access_retries': opts.file_access_retries, 'fragment_retries': opts.fragment_retries, 'extractor_retries': opts.extractor_retries, 'retry_sleep_functions': opts.retry_sleep, 'skip_unavailable_fragments': opts.skip_unavailable_fragments, 'keep_fragments': opts.keep_fragments, 'concurrent_fragment_downloads': opts.concurrent_fragment_downloads, 'buffersize': opts.buffersize, 'noresizebuffer': opts.noresizebuffer, 'http_chunk_size': opts.http_chunk_size, 'continuedl': opts.continue_dl, 'noprogress': opts.quiet if opts.noprogress is None else opts.noprogress, 'progress_with_newline': opts.progress_with_newline, 'progress_template': opts.progress_template, 'progress_delta': opts.progress_delta, 'playliststart': opts.playliststart, 'playlistend': opts.playlistend, 'playlistreverse': opts.playlist_reverse, 'playlistrandom': opts.playlist_random, 'lazy_playlist': opts.lazy_playlist, 'noplaylist': opts.noplaylist, 'logtostderr': opts.outtmpl.get('default') == '-', 'consoletitle': opts.consoletitle, 'nopart': opts.nopart, 'updatetime': opts.updatetime, 'writedescription': opts.writedescription, 'writeannotations': opts.writeannotations, 'writeinfojson': opts.writeinfojson, 'allow_playlist_files': opts.allow_playlist_files, 'clean_infojson': opts.clean_infojson, 'getcomments': opts.getcomments, 'writethumbnail': opts.writethumbnail is True, 'write_all_thumbnails': opts.writethumbnail == 'all', 'writelink': opts.writelink, 'writeurllink': opts.writeurllink, 'writewebloclink': opts.writewebloclink, 'writedesktoplink': opts.writedesktoplink, 'writesubtitles': opts.writesubtitles, 'writeautomaticsub': opts.writeautomaticsub, 'allsubtitles': opts.allsubtitles, 'listsubtitles': opts.listsubtitles, 'subtitlesformat': opts.subtitlesformat, 'subtitleslangs': opts.subtitleslangs, 'matchtitle': decodeOption(opts.matchtitle), 'rejecttitle': decodeOption(opts.rejecttitle), 'max_downloads': opts.max_downloads, 'prefer_free_formats': opts.prefer_free_formats, 'trim_file_name': opts.trim_file_name, 'verbose': opts.verbose, 'dump_intermediate_pages': opts.dump_intermediate_pages, 'write_pages': opts.write_pages, 'load_pages': opts.load_pages, 'test': opts.test, 'keepvideo': opts.keepvideo, 'min_filesize': opts.min_filesize, 'max_filesize': opts.max_filesize, 'min_views': opts.min_views, 'max_views': opts.max_views, 'daterange': opts.date, 'cachedir': opts.cachedir, 'youtube_print_sig_code': opts.youtube_print_sig_code, 'age_limit': opts.age_limit, 'download_archive': opts.download_archive, 'break_on_existing': opts.break_on_existing, 'break_on_reject': opts.break_on_reject, 'break_per_url': opts.break_per_url, 'skip_playlist_after_errors': opts.skip_playlist_after_errors, 'cookiefile': opts.cookiefile, 'cookiesfrombrowser': opts.cookiesfrombrowser, 'legacyserverconnect': opts.legacy_server_connect, 'nocheckcertificate': opts.no_check_certificate, 'prefer_insecure': opts.prefer_insecure, 'enable_file_urls': opts.enable_file_urls, 'http_headers': opts.headers, 'proxy': opts.proxy, 'socket_timeout': opts.socket_timeout, 'bidi_workaround': opts.bidi_workaround, 'debug_printtraffic': opts.debug_printtraffic, 'prefer_ffmpeg': opts.prefer_ffmpeg, 'include_ads': opts.include_ads, 'default_search': opts.default_search, 'dynamic_mpd': opts.dynamic_mpd, 'extractor_args': opts.extractor_args, 'youtube_include_dash_manifest': opts.youtube_include_dash_manifest, 'youtube_include_hls_manifest': opts.youtube_include_hls_manifest, 'encoding': opts.encoding, 'extract_flat': opts.extract_flat, 'live_from_start': opts.live_from_start, 'wait_for_video': opts.wait_for_video, 'mark_watched': opts.mark_watched, 'merge_output_format': opts.merge_output_format, 'final_ext': final_ext, 'postprocessors': postprocessors, 'fixup': opts.fixup, 'source_address': opts.source_address, 'impersonate': opts.impersonate, 'call_home': opts.call_home, 'sleep_interval_requests': opts.sleep_interval_requests, 'sleep_interval': opts.sleep_interval, 'max_sleep_interval': opts.max_sleep_interval, 'sleep_interval_subtitles': opts.sleep_interval_subtitles, 'external_downloader': opts.external_downloader, 'download_ranges': opts.download_ranges, 'force_keyframes_at_cuts': opts.force_keyframes_at_cuts, 'list_thumbnails': opts.list_thumbnails, 'playlist_items': opts.playlist_items, 'xattr_set_filesize': opts.xattr_set_filesize, 'match_filter': opts.match_filter, 'color': opts.color, 'ffmpeg_location': opts.ffmpeg_location, 'hls_prefer_native': opts.hls_prefer_native, 'hls_use_mpegts': opts.hls_use_mpegts, 'hls_split_discontinuity': opts.hls_split_discontinuity, 'external_downloader_args': opts.external_downloader_args, 'postprocessor_args': opts.postprocessor_args, 'cn_verification_proxy': opts.cn_verification_proxy, 'geo_verification_proxy': opts.geo_verification_proxy, 'geo_bypass': opts.geo_bypass, 'geo_bypass_country': opts.geo_bypass_country, 'geo_bypass_ip_block': opts.geo_bypass_ip_block, '_warnings': warnings, '_deprecation_warnings': deprecation_warnings, 'compat_opts': opts.compat_opts, }) def _real_main(argv=None): setproctitle('yt-dlp') parser, opts, all_urls, ydl_opts = parse_options(argv) # Dump user agent if opts.dump_user_agent: ua = traverse_obj(opts.headers, 'User-Agent', casesense=False, default=std_headers['User-Agent']) write_string(f'{ua}\n', out=sys.stdout) return if print_extractor_information(opts, all_urls): return # We may need ffmpeg_location without having access to the YoutubeDL instance # See https://github.com/yt-dlp/yt-dlp/issues/2191 if opts.ffmpeg_location: FFmpegPostProcessor._ffmpeg_location.set(opts.ffmpeg_location) with YoutubeDL(ydl_opts) as ydl: pre_process = opts.update_self or opts.rm_cachedir actual_use = all_urls or opts.load_info_filename if opts.rm_cachedir: ydl.cache.remove() try: updater = Updater(ydl, opts.update_self) if opts.update_self and updater.update() and actual_use: if updater.cmd: return updater.restart() # This code is reachable only for zip variant in py < 3.10 # It makes sense to exit here, but the old behavior is to continue ydl.report_warning('Restart yt-dlp to use the updated version') # return 100, 'ERROR: The program must exit for the update to complete' except Exception: traceback.print_exc() ydl._download_retcode = 100 if opts.list_impersonate_targets: known_targets = [ # List of simplified targets we know are supported, # to help users know what dependencies may be required. (ImpersonateTarget('chrome'), 'curl_cffi'), (ImpersonateTarget('edge'), 'curl_cffi'), (ImpersonateTarget('safari'), 'curl_cffi'), ] available_targets = ydl._get_available_impersonate_targets() def make_row(target, handler): return [ join_nonempty(target.client.title(), target.version, delim='-') or '-', join_nonempty((target.os or '').title(), target.os_version, delim='-') or '-', handler, ] rows = [make_row(target, handler) for target, handler in available_targets] for known_target, known_handler in known_targets: if not any( known_target in target and handler == known_handler for target, handler in available_targets ): rows.append([ ydl._format_out(text, ydl.Styles.SUPPRESS) for text in make_row(known_target, f'{known_handler} (not available)') ]) ydl.to_screen('[info] Available impersonate targets') ydl.to_stdout(render_table(['Client', 'OS', 'Source'], rows, extra_gap=2, delim='-')) return if not actual_use: if pre_process: return ydl._download_retcode args = sys.argv[1:] if argv is None else argv ydl.warn_if_short_id(args) # Show a useful error message and wait for keypress if not launched from shell on Windows if not args and compat_os_name == 'nt' and getattr(sys, 'frozen', False): import ctypes.wintypes import msvcrt kernel32 = ctypes.WinDLL('Kernel32') buffer = (1 * ctypes.wintypes.DWORD)() attached_processes = kernel32.GetConsoleProcessList(buffer, 1) # If we only have a single process attached, then the executable was double clicked # When using `pyinstaller` with `--onefile`, two processes get attached is_onefile = hasattr(sys, '_MEIPASS') and os.path.basename(sys._MEIPASS).startswith('_MEI') if attached_processes == 1 or is_onefile and attached_processes == 2: print(parser._generate_error_message( 'Do not double-click the executable, instead call it from a command line.\n' 'Please read the README for further information on how to use yt-dlp: ' 'https://github.com/yt-dlp/yt-dlp#readme')) msvcrt.getch() _exit(2) parser.error( 'You must provide at least one URL.\n' 'Type yt-dlp --help to see a list of all options.') parser.destroy() try: if opts.load_info_filename is not None: if all_urls: ydl.report_warning('URLs are ignored due to --load-info-json') return ydl.download_with_info_file(expand_path(opts.load_info_filename)) else: return ydl.download(all_urls) except DownloadCancelled: ydl.to_screen('Aborting remaining downloads') return 101 def main(argv=None): global _IN_CLI _IN_CLI = True try: _exit(*variadic(_real_main(argv))) except DownloadError: _exit(1) except SameFileError as e: _exit(f'ERROR: {e}') except KeyboardInterrupt: _exit('\nERROR: Interrupted by user') except BrokenPipeError as e: # https://docs.python.org/3/library/signal.html#note-on-sigpipe devnull = os.open(os.devnull, os.O_WRONLY) os.dup2(devnull, sys.stdout.fileno()) _exit(f'\nERROR: {e}') except optparse.OptParseError as e: _exit(2, f'\n{e}') from .extractor import gen_extractors, list_extractors __all__ = [ 'main', 'YoutubeDL', 'parse_options', 'gen_extractors', 'list_extractors', ] yt-dlp-2024.09.27/yt_dlp/__main__.py000066400000000000000000000005571467563447100167470ustar00rootroot00000000000000#!/usr/bin/env python3 # Execute with # $ python3 -m yt_dlp import sys if __package__ is None and not getattr(sys, 'frozen', False): # direct call of __main__.py import os.path path = os.path.realpath(os.path.abspath(__file__)) sys.path.insert(0, os.path.dirname(os.path.dirname(path))) import yt_dlp if __name__ == '__main__': yt_dlp.main() yt-dlp-2024.09.27/yt_dlp/__pyinstaller/000077500000000000000000000000001467563447100175125ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/__pyinstaller/__init__.py000066400000000000000000000001101467563447100216130ustar00rootroot00000000000000import os def get_hook_dirs(): return [os.path.dirname(__file__)] yt-dlp-2024.09.27/yt_dlp/__pyinstaller/hook-yt_dlp.py000066400000000000000000000023731467563447100223220ustar00rootroot00000000000000import sys from PyInstaller.utils.hooks import collect_submodules, collect_data_files def pycryptodome_module(): try: import Cryptodome # noqa: F401 except ImportError: try: import Crypto # noqa: F401 print('WARNING: Using Crypto since Cryptodome is not available. ' 'Install with: python3 -m pip install pycryptodomex', file=sys.stderr) return 'Crypto' except ImportError: pass return 'Cryptodome' def get_hidden_imports(): yield from ('yt_dlp.compat._legacy', 'yt_dlp.compat._deprecated') yield from ('yt_dlp.utils._legacy', 'yt_dlp.utils._deprecated') yield pycryptodome_module() # Only `websockets` is required, others are collected just in case for module in ('websockets', 'requests', 'urllib3'): yield from collect_submodules(module) # These are auto-detected, but explicitly add them just in case yield from ('mutagen', 'brotli', 'certifi', 'secretstorage', 'curl_cffi') hiddenimports = list(get_hidden_imports()) print(f'Adding imports: {hiddenimports}') excludedimports = ['youtube_dl', 'youtube_dlc', 'test', 'ytdlp_plugins', 'devscripts', 'bundle'] datas = collect_data_files('curl_cffi', includes=['cacert.pem']) yt-dlp-2024.09.27/yt_dlp/aes.py000066400000000000000000000533231467563447100157760ustar00rootroot00000000000000import base64 from math import ceil from .compat import compat_ord from .dependencies import Cryptodome from .utils import bytes_to_intlist, intlist_to_bytes if Cryptodome.AES: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using pycryptodome """ return Cryptodome.AES.new(key, Cryptodome.AES.MODE_CBC, iv).decrypt(data) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using pycryptodome """ return Cryptodome.AES.new(key, Cryptodome.AES.MODE_GCM, nonce).decrypt_and_verify(data, tag) else: def aes_cbc_decrypt_bytes(data, key, iv): """ Decrypt bytes with AES-CBC using native implementation since pycryptodome is unavailable """ return intlist_to_bytes(aes_cbc_decrypt(*map(bytes_to_intlist, (data, key, iv)))) def aes_gcm_decrypt_and_verify_bytes(data, key, tag, nonce): """ Decrypt bytes with AES-GCM using native implementation since pycryptodome is unavailable """ return intlist_to_bytes(aes_gcm_decrypt_and_verify(*map(bytes_to_intlist, (data, key, tag, nonce)))) def aes_cbc_encrypt_bytes(data, key, iv, **kwargs): return intlist_to_bytes(aes_cbc_encrypt(*map(bytes_to_intlist, (data, key, iv)), **kwargs)) BLOCK_SIZE_BYTES = 16 def unpad_pkcs7(data): return data[:-compat_ord(data[-1])] def pkcs7_padding(data): """ PKCS#7 padding @param {int[]} data cleartext @returns {int[]} padding data """ remaining_length = BLOCK_SIZE_BYTES - len(data) % BLOCK_SIZE_BYTES return data + [remaining_length] * remaining_length def pad_block(block, padding_mode): """ Pad a block with the given padding mode @param {int[]} block block to pad @param padding_mode padding mode """ padding_size = BLOCK_SIZE_BYTES - len(block) PADDING_BYTE = { 'pkcs7': padding_size, 'iso7816': 0x0, 'whitespace': 0x20, 'zero': 0x0, } if padding_size < 0: raise ValueError('Block size exceeded') elif padding_mode not in PADDING_BYTE: raise NotImplementedError(f'Padding mode {padding_mode} is not implemented') if padding_mode == 'iso7816' and padding_size: block = [*block, 0x80] # NB: += mutates list padding_size -= 1 return block + [PADDING_BYTE[padding_mode]] * padding_size def aes_ecb_encrypt(data, key, iv=None): """ Encrypt with aes in ECB mode. Using PKCS#7 padding @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv Unused for this mode @returns {int[]} encrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_encrypt(pkcs7_padding(block), expanded_key) return encrypted_data def aes_ecb_decrypt(data, key, iv=None): """ Decrypt with aes in ECB mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv Unused for this mode @returns {int[]} decrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) encrypted_data = [] for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] encrypted_data += aes_decrypt(block, expanded_key) return encrypted_data[:len(data)] def aes_ctr_decrypt(data, key, iv): """ Decrypt with aes in counter mode @param {int[]} data cipher @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte initialization vector @returns {int[]} decrypted data """ return aes_ctr_encrypt(data, key, iv) def aes_ctr_encrypt(data, key, iv): """ Encrypt with aes in counter mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte initialization vector @returns {int[]} encrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) counter = iter_vector(iv) encrypted_data = [] for i in range(block_count): counter_block = next(counter) block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block += [0] * (BLOCK_SIZE_BYTES - len(block)) cipher_counter_block = aes_encrypt(counter_block, expanded_key) encrypted_data += xor(block, cipher_counter_block) return encrypted_data[:len(data)] def aes_cbc_decrypt(data, key, iv): """ Decrypt with aes in CBC mode @param {int[]} data cipher @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte IV @returns {int[]} decrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) decrypted_data = [] previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block += [0] * (BLOCK_SIZE_BYTES - len(block)) decrypted_block = aes_decrypt(block, expanded_key) decrypted_data += xor(decrypted_block, previous_cipher_block) previous_cipher_block = block return decrypted_data[:len(data)] def aes_cbc_encrypt(data, key, iv, *, padding_mode='pkcs7'): """ Encrypt with aes in CBC mode @param {int[]} data cleartext @param {int[]} key 16/24/32-Byte cipher key @param {int[]} iv 16-Byte IV @param padding_mode Padding mode to use @returns {int[]} encrypted data """ expanded_key = key_expansion(key) block_count = int(ceil(float(len(data)) / BLOCK_SIZE_BYTES)) encrypted_data = [] previous_cipher_block = iv for i in range(block_count): block = data[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES] block = pad_block(block, padding_mode) mixed_block = xor(block, previous_cipher_block) encrypted_block = aes_encrypt(mixed_block, expanded_key) encrypted_data += encrypted_block previous_cipher_block = encrypted_block return encrypted_data def aes_gcm_decrypt_and_verify(data, key, tag, nonce): """ Decrypt with aes in GBM mode and checks authenticity using tag @param {int[]} data cipher @param {int[]} key 16-Byte cipher key @param {int[]} tag authentication tag @param {int[]} nonce IV (recommended 12-Byte) @returns {int[]} decrypted data """ # XXX: check aes, gcm param hash_subkey = aes_encrypt([0] * BLOCK_SIZE_BYTES, key_expansion(key)) if len(nonce) == 12: j0 = [*nonce, 0, 0, 0, 1] else: fill = (BLOCK_SIZE_BYTES - (len(nonce) % BLOCK_SIZE_BYTES)) % BLOCK_SIZE_BYTES + 8 ghash_in = nonce + [0] * fill + bytes_to_intlist((8 * len(nonce)).to_bytes(8, 'big')) j0 = ghash(hash_subkey, ghash_in) # TODO: add nonce support to aes_ctr_decrypt # nonce_ctr = j0[:12] iv_ctr = inc(j0) decrypted_data = aes_ctr_decrypt(data, key, iv_ctr + [0] * (BLOCK_SIZE_BYTES - len(iv_ctr))) pad_len = len(data) // 16 * 16 s_tag = ghash( hash_subkey, data + [0] * (BLOCK_SIZE_BYTES - len(data) + pad_len) # pad + bytes_to_intlist((0 * 8).to_bytes(8, 'big') # length of associated data + ((len(data) * 8).to_bytes(8, 'big'))), # length of data ) if tag != aes_ctr_encrypt(s_tag, key, j0): raise ValueError('Mismatching authentication tag') return decrypted_data def aes_encrypt(data, expanded_key): """ Encrypt one block with aes @param {int[]} data 16-Byte state @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte cipher """ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 data = xor(data, expanded_key[:BLOCK_SIZE_BYTES]) for i in range(1, rounds + 1): data = sub_bytes(data) data = shift_rows(data) if i != rounds: data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX)) data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) return data def aes_decrypt(data, expanded_key): """ Decrypt one block with aes @param {int[]} data 16-Byte cipher @param {int[]} expanded_key 176/208/240-Byte expanded key @returns {int[]} 16-Byte state """ rounds = len(expanded_key) // BLOCK_SIZE_BYTES - 1 for i in range(rounds, 0, -1): data = xor(data, expanded_key[i * BLOCK_SIZE_BYTES: (i + 1) * BLOCK_SIZE_BYTES]) if i != rounds: data = list(iter_mix_columns(data, MIX_COLUMN_MATRIX_INV)) data = shift_rows_inv(data) data = sub_bytes_inv(data) return xor(data, expanded_key[:BLOCK_SIZE_BYTES]) def aes_decrypt_text(data, password, key_size_bytes): """ Decrypt text - The first 8 Bytes of decoded 'data' are the 8 high Bytes of the counter - The cipher key is retrieved by encrypting the first 16 Byte of 'password' with the first 'key_size_bytes' Bytes from 'password' (if necessary filled with 0's) - Mode of operation is 'counter' @param {str} data Base64 encoded string @param {str,unicode} password Password (will be encoded with utf-8) @param {int} key_size_bytes Possible values: 16 for 128-Bit, 24 for 192-Bit or 32 for 256-Bit @returns {str} Decrypted data """ NONCE_LENGTH_BYTES = 8 data = bytes_to_intlist(base64.b64decode(data)) password = bytes_to_intlist(password.encode()) key = password[:key_size_bytes] + [0] * (key_size_bytes - len(password)) key = aes_encrypt(key[:BLOCK_SIZE_BYTES], key_expansion(key)) * (key_size_bytes // BLOCK_SIZE_BYTES) nonce = data[:NONCE_LENGTH_BYTES] cipher = data[NONCE_LENGTH_BYTES:] decrypted_data = aes_ctr_decrypt(cipher, key, nonce + [0] * (BLOCK_SIZE_BYTES - NONCE_LENGTH_BYTES)) return intlist_to_bytes(decrypted_data) RCON = (0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36) SBOX = (0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5, 0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76, 0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0, 0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0, 0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC, 0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15, 0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A, 0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75, 0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0, 0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84, 0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B, 0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF, 0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85, 0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8, 0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5, 0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2, 0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17, 0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73, 0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88, 0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB, 0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C, 0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79, 0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9, 0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08, 0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6, 0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A, 0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E, 0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E, 0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94, 0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF, 0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68, 0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16) SBOX_INV = (0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d) MIX_COLUMN_MATRIX = ((0x2, 0x3, 0x1, 0x1), (0x1, 0x2, 0x3, 0x1), (0x1, 0x1, 0x2, 0x3), (0x3, 0x1, 0x1, 0x2)) MIX_COLUMN_MATRIX_INV = ((0xE, 0xB, 0xD, 0x9), (0x9, 0xE, 0xB, 0xD), (0xD, 0x9, 0xE, 0xB), (0xB, 0xD, 0x9, 0xE)) RIJNDAEL_EXP_TABLE = (0x01, 0x03, 0x05, 0x0F, 0x11, 0x33, 0x55, 0xFF, 0x1A, 0x2E, 0x72, 0x96, 0xA1, 0xF8, 0x13, 0x35, 0x5F, 0xE1, 0x38, 0x48, 0xD8, 0x73, 0x95, 0xA4, 0xF7, 0x02, 0x06, 0x0A, 0x1E, 0x22, 0x66, 0xAA, 0xE5, 0x34, 0x5C, 0xE4, 0x37, 0x59, 0xEB, 0x26, 0x6A, 0xBE, 0xD9, 0x70, 0x90, 0xAB, 0xE6, 0x31, 0x53, 0xF5, 0x04, 0x0C, 0x14, 0x3C, 0x44, 0xCC, 0x4F, 0xD1, 0x68, 0xB8, 0xD3, 0x6E, 0xB2, 0xCD, 0x4C, 0xD4, 0x67, 0xA9, 0xE0, 0x3B, 0x4D, 0xD7, 0x62, 0xA6, 0xF1, 0x08, 0x18, 0x28, 0x78, 0x88, 0x83, 0x9E, 0xB9, 0xD0, 0x6B, 0xBD, 0xDC, 0x7F, 0x81, 0x98, 0xB3, 0xCE, 0x49, 0xDB, 0x76, 0x9A, 0xB5, 0xC4, 0x57, 0xF9, 0x10, 0x30, 0x50, 0xF0, 0x0B, 0x1D, 0x27, 0x69, 0xBB, 0xD6, 0x61, 0xA3, 0xFE, 0x19, 0x2B, 0x7D, 0x87, 0x92, 0xAD, 0xEC, 0x2F, 0x71, 0x93, 0xAE, 0xE9, 0x20, 0x60, 0xA0, 0xFB, 0x16, 0x3A, 0x4E, 0xD2, 0x6D, 0xB7, 0xC2, 0x5D, 0xE7, 0x32, 0x56, 0xFA, 0x15, 0x3F, 0x41, 0xC3, 0x5E, 0xE2, 0x3D, 0x47, 0xC9, 0x40, 0xC0, 0x5B, 0xED, 0x2C, 0x74, 0x9C, 0xBF, 0xDA, 0x75, 0x9F, 0xBA, 0xD5, 0x64, 0xAC, 0xEF, 0x2A, 0x7E, 0x82, 0x9D, 0xBC, 0xDF, 0x7A, 0x8E, 0x89, 0x80, 0x9B, 0xB6, 0xC1, 0x58, 0xE8, 0x23, 0x65, 0xAF, 0xEA, 0x25, 0x6F, 0xB1, 0xC8, 0x43, 0xC5, 0x54, 0xFC, 0x1F, 0x21, 0x63, 0xA5, 0xF4, 0x07, 0x09, 0x1B, 0x2D, 0x77, 0x99, 0xB0, 0xCB, 0x46, 0xCA, 0x45, 0xCF, 0x4A, 0xDE, 0x79, 0x8B, 0x86, 0x91, 0xA8, 0xE3, 0x3E, 0x42, 0xC6, 0x51, 0xF3, 0x0E, 0x12, 0x36, 0x5A, 0xEE, 0x29, 0x7B, 0x8D, 0x8C, 0x8F, 0x8A, 0x85, 0x94, 0xA7, 0xF2, 0x0D, 0x17, 0x39, 0x4B, 0xDD, 0x7C, 0x84, 0x97, 0xA2, 0xFD, 0x1C, 0x24, 0x6C, 0xB4, 0xC7, 0x52, 0xF6, 0x01) RIJNDAEL_LOG_TABLE = (0x00, 0x00, 0x19, 0x01, 0x32, 0x02, 0x1a, 0xc6, 0x4b, 0xc7, 0x1b, 0x68, 0x33, 0xee, 0xdf, 0x03, 0x64, 0x04, 0xe0, 0x0e, 0x34, 0x8d, 0x81, 0xef, 0x4c, 0x71, 0x08, 0xc8, 0xf8, 0x69, 0x1c, 0xc1, 0x7d, 0xc2, 0x1d, 0xb5, 0xf9, 0xb9, 0x27, 0x6a, 0x4d, 0xe4, 0xa6, 0x72, 0x9a, 0xc9, 0x09, 0x78, 0x65, 0x2f, 0x8a, 0x05, 0x21, 0x0f, 0xe1, 0x24, 0x12, 0xf0, 0x82, 0x45, 0x35, 0x93, 0xda, 0x8e, 0x96, 0x8f, 0xdb, 0xbd, 0x36, 0xd0, 0xce, 0x94, 0x13, 0x5c, 0xd2, 0xf1, 0x40, 0x46, 0x83, 0x38, 0x66, 0xdd, 0xfd, 0x30, 0xbf, 0x06, 0x8b, 0x62, 0xb3, 0x25, 0xe2, 0x98, 0x22, 0x88, 0x91, 0x10, 0x7e, 0x6e, 0x48, 0xc3, 0xa3, 0xb6, 0x1e, 0x42, 0x3a, 0x6b, 0x28, 0x54, 0xfa, 0x85, 0x3d, 0xba, 0x2b, 0x79, 0x0a, 0x15, 0x9b, 0x9f, 0x5e, 0xca, 0x4e, 0xd4, 0xac, 0xe5, 0xf3, 0x73, 0xa7, 0x57, 0xaf, 0x58, 0xa8, 0x50, 0xf4, 0xea, 0xd6, 0x74, 0x4f, 0xae, 0xe9, 0xd5, 0xe7, 0xe6, 0xad, 0xe8, 0x2c, 0xd7, 0x75, 0x7a, 0xeb, 0x16, 0x0b, 0xf5, 0x59, 0xcb, 0x5f, 0xb0, 0x9c, 0xa9, 0x51, 0xa0, 0x7f, 0x0c, 0xf6, 0x6f, 0x17, 0xc4, 0x49, 0xec, 0xd8, 0x43, 0x1f, 0x2d, 0xa4, 0x76, 0x7b, 0xb7, 0xcc, 0xbb, 0x3e, 0x5a, 0xfb, 0x60, 0xb1, 0x86, 0x3b, 0x52, 0xa1, 0x6c, 0xaa, 0x55, 0x29, 0x9d, 0x97, 0xb2, 0x87, 0x90, 0x61, 0xbe, 0xdc, 0xfc, 0xbc, 0x95, 0xcf, 0xcd, 0x37, 0x3f, 0x5b, 0xd1, 0x53, 0x39, 0x84, 0x3c, 0x41, 0xa2, 0x6d, 0x47, 0x14, 0x2a, 0x9e, 0x5d, 0x56, 0xf2, 0xd3, 0xab, 0x44, 0x11, 0x92, 0xd9, 0x23, 0x20, 0x2e, 0x89, 0xb4, 0x7c, 0xb8, 0x26, 0x77, 0x99, 0xe3, 0xa5, 0x67, 0x4a, 0xed, 0xde, 0xc5, 0x31, 0xfe, 0x18, 0x0d, 0x63, 0x8c, 0x80, 0xc0, 0xf7, 0x70, 0x07) def key_expansion(data): """ Generate key schedule @param {int[]} data 16/24/32-Byte cipher key @returns {int[]} 176/208/240-Byte expanded key """ data = data[:] # copy rcon_iteration = 1 key_size_bytes = len(data) expanded_key_size_bytes = (key_size_bytes // 4 + 7) * BLOCK_SIZE_BYTES while len(data) < expanded_key_size_bytes: temp = data[-4:] temp = key_schedule_core(temp, rcon_iteration) rcon_iteration += 1 data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) for _ in range(3): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) if key_size_bytes == 32: temp = data[-4:] temp = sub_bytes(temp) data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) for _ in range(3 if key_size_bytes == 32 else 2 if key_size_bytes == 24 else 0): temp = data[-4:] data += xor(temp, data[-key_size_bytes: 4 - key_size_bytes]) return data[:expanded_key_size_bytes] def iter_vector(iv): while True: yield iv iv = inc(iv) def sub_bytes(data): return [SBOX[x] for x in data] def sub_bytes_inv(data): return [SBOX_INV[x] for x in data] def rotate(data): return data[1:] + [data[0]] def key_schedule_core(data, rcon_iteration): data = rotate(data) data = sub_bytes(data) data[0] = data[0] ^ RCON[rcon_iteration] return data def xor(data1, data2): return [x ^ y for x, y in zip(data1, data2)] def iter_mix_columns(data, matrix): for i in (0, 4, 8, 12): for row in matrix: mixed = 0 for j in range(4): # xor is (+) and (-) mixed ^= (0 if data[i:i + 4][j] == 0 or row[j] == 0 else RIJNDAEL_EXP_TABLE[(RIJNDAEL_LOG_TABLE[data[i + j]] + RIJNDAEL_LOG_TABLE[row[j]]) % 0xFF]) yield mixed def shift_rows(data): return [data[((column + row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] def shift_rows_inv(data): return [data[((column - row) & 0b11) * 4 + row] for column in range(4) for row in range(4)] def shift_block(data): data_shifted = [] bit = 0 for n in data: if bit: n |= 0x100 bit = n & 1 n >>= 1 data_shifted.append(n) return data_shifted def inc(data): data = data[:] # copy for i in range(len(data) - 1, -1, -1): if data[i] == 255: data[i] = 0 else: data[i] = data[i] + 1 break return data def block_product(block_x, block_y): # NIST SP 800-38D, Algorithm 1 if len(block_x) != BLOCK_SIZE_BYTES or len(block_y) != BLOCK_SIZE_BYTES: raise ValueError(f'Length of blocks need to be {BLOCK_SIZE_BYTES} bytes') block_r = [0xE1] + [0] * (BLOCK_SIZE_BYTES - 1) block_v = block_y[:] block_z = [0] * BLOCK_SIZE_BYTES for i in block_x: for bit in range(7, -1, -1): if i & (1 << bit): block_z = xor(block_z, block_v) do_xor = block_v[-1] & 1 block_v = shift_block(block_v) if do_xor: block_v = xor(block_v, block_r) return block_z def ghash(subkey, data): # NIST SP 800-38D, Algorithm 2 if len(data) % BLOCK_SIZE_BYTES: raise ValueError(f'Length of data should be {BLOCK_SIZE_BYTES} bytes') last_y = [0] * BLOCK_SIZE_BYTES for i in range(0, len(data), BLOCK_SIZE_BYTES): block = data[i: i + BLOCK_SIZE_BYTES] last_y = block_product(xor(last_y, block), subkey) return last_y __all__ = [ 'aes_cbc_decrypt', 'aes_cbc_decrypt_bytes', 'aes_ctr_decrypt', 'aes_decrypt_text', 'aes_decrypt', 'aes_ecb_decrypt', 'aes_gcm_decrypt_and_verify', 'aes_gcm_decrypt_and_verify_bytes', 'aes_cbc_encrypt', 'aes_cbc_encrypt_bytes', 'aes_ctr_encrypt', 'aes_ecb_encrypt', 'aes_encrypt', 'key_expansion', 'pad_block', 'pkcs7_padding', 'unpad_pkcs7', ] yt-dlp-2024.09.27/yt_dlp/cache.py000066400000000000000000000064521467563447100162720ustar00rootroot00000000000000import contextlib import json import os import re import shutil import traceback import urllib.parse from .utils import expand_path, traverse_obj, version_tuple, write_json_file from .version import __version__ class Cache: def __init__(self, ydl): self._ydl = ydl def _get_root_dir(self): res = self._ydl.params.get('cachedir') if res is None: cache_root = os.getenv('XDG_CACHE_HOME', '~/.cache') res = os.path.join(cache_root, 'yt-dlp') return expand_path(res) def _get_cache_fn(self, section, key, dtype): assert re.match(r'^[\w.-]+$', section), f'invalid section {section!r}' key = urllib.parse.quote(key, safe='').replace('%', ',') # encode non-ascii characters return os.path.join(self._get_root_dir(), section, f'{key}.{dtype}') @property def enabled(self): return self._ydl.params.get('cachedir') is not False def store(self, section, key, data, dtype='json'): assert dtype in ('json',) if not self.enabled: return fn = self._get_cache_fn(section, key, dtype) try: os.makedirs(os.path.dirname(fn), exist_ok=True) self._ydl.write_debug(f'Saving {section}.{key} to cache') write_json_file({'yt-dlp_version': __version__, 'data': data}, fn) except Exception: tb = traceback.format_exc() self._ydl.report_warning(f'Writing cache to {fn!r} failed: {tb}') def _validate(self, data, min_ver): version = traverse_obj(data, 'yt-dlp_version') if not version: # Backward compatibility data, version = {'data': data}, '2022.08.19' if not min_ver or version_tuple(version) >= version_tuple(min_ver): return data['data'] self._ydl.write_debug(f'Discarding old cache from version {version} (needs {min_ver})') def load(self, section, key, dtype='json', default=None, *, min_ver=None): assert dtype in ('json',) if not self.enabled: return default cache_fn = self._get_cache_fn(section, key, dtype) with contextlib.suppress(OSError): try: with open(cache_fn, encoding='utf-8') as cachef: self._ydl.write_debug(f'Loading {section}.{key} from cache') return self._validate(json.load(cachef), min_ver) except (ValueError, KeyError): try: file_size = os.path.getsize(cache_fn) except OSError as oe: file_size = str(oe) self._ydl.report_warning(f'Cache retrieval from {cache_fn} failed ({file_size})') return default def remove(self): if not self.enabled: self._ydl.to_screen('Cache is disabled (Did you combine --no-cache-dir and --rm-cache-dir?)') return cachedir = self._get_root_dir() if not any((term in cachedir) for term in ('cache', 'tmp')): raise Exception(f'Not removing directory {cachedir} - this does not look like a cache dir') self._ydl.to_screen( f'Removing cache dir {cachedir} .', skip_eol=True) if os.path.exists(cachedir): self._ydl.to_screen('.', skip_eol=True) shutil.rmtree(cachedir) self._ydl.to_screen('.') yt-dlp-2024.09.27/yt_dlp/compat/000077500000000000000000000000001467563447100161315ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/compat/__init__.py000066400000000000000000000046271467563447100202530ustar00rootroot00000000000000import os import sys import xml.etree.ElementTree as etree from .compat_utils import passthrough_module passthrough_module(__name__, '._deprecated') del passthrough_module # HTMLParseError has been deprecated in Python 3.3 and removed in # Python 3.5. Introducing dummy exception for Python >3.5 for compatible # and uniform cross-version exception handling class compat_HTMLParseError(ValueError): pass class _TreeBuilder(etree.TreeBuilder): def doctype(self, name, pubid, system): pass def compat_etree_fromstring(text): return etree.XML(text, parser=etree.XMLParser(target=_TreeBuilder())) compat_os_name = os._name if os.name == 'java' else os.name def compat_shlex_quote(s): from ..utils import shell_quote return shell_quote(s) def compat_ord(c): return c if isinstance(c, int) else ord(c) if compat_os_name == 'nt' and sys.version_info < (3, 8): # os.path.realpath on Windows does not follow symbolic links # prior to Python 3.8 (see https://bugs.python.org/issue9949) def compat_realpath(path): while os.path.islink(path): path = os.path.abspath(os.readlink(path)) return os.path.realpath(path) else: compat_realpath = os.path.realpath # Python 3.8+ does not honor %HOME% on windows, but this breaks compatibility with youtube-dl # See https://github.com/yt-dlp/yt-dlp/issues/792 # https://docs.python.org/3/library/os.path.html#os.path.expanduser if compat_os_name in ('nt', 'ce'): def compat_expanduser(path): HOME = os.environ.get('HOME') if not HOME: return os.path.expanduser(path) elif not path.startswith('~'): return path i = path.replace('\\', '/', 1).find('/') # ~user if i < 0: i = len(path) userhome = os.path.join(os.path.dirname(HOME), path[1:i]) if i > 1 else HOME return userhome + path[i:] else: compat_expanduser = os.path.expanduser def urllib_req_to_req(urllib_request): """Convert urllib Request to a networking Request""" from ..networking import Request from ..utils.networking import HTTPHeaderDict return Request( urllib_request.get_full_url(), data=urllib_request.data, method=urllib_request.get_method(), headers=HTTPHeaderDict(urllib_request.headers, urllib_request.unredirected_hdrs), extensions={'timeout': urllib_request.timeout} if hasattr(urllib_request, 'timeout') else None) yt-dlp-2024.09.27/yt_dlp/compat/_deprecated.py000066400000000000000000000013141467563447100207410ustar00rootroot00000000000000"""Deprecated - New code should avoid these""" import warnings from .compat_utils import passthrough_module # XXX: Implement this the same way as other DeprecationWarnings without circular import passthrough_module(__name__, '.._legacy', callback=lambda attr: warnings.warn( DeprecationWarning(f'{__name__}.{attr} is deprecated'), stacklevel=6)) del passthrough_module import base64 import urllib.error import urllib.parse compat_str = str compat_b64decode = base64.b64decode compat_urlparse = urllib.parse compat_parse_qs = urllib.parse.parse_qs compat_urllib_parse_unquote = urllib.parse.unquote compat_urllib_parse_urlencode = urllib.parse.urlencode compat_urllib_parse_urlparse = urllib.parse.urlparse yt-dlp-2024.09.27/yt_dlp/compat/_legacy.py000066400000000000000000000075721467563447100201210ustar00rootroot00000000000000""" Do not use! """ import base64 import collections import ctypes import getpass import html.entities import html.parser import http.client import http.cookiejar import http.cookies import http.server import itertools import os import shlex import shutil import socket import struct import subprocess import tokenize import urllib.error import urllib.parse import urllib.request import xml.etree.ElementTree as etree # isort: split import asyncio # noqa: F401 import re # noqa: F401 from asyncio import run as compat_asyncio_run # noqa: F401 from re import Pattern as compat_Pattern # noqa: F401 from re import match as compat_Match # noqa: F401 from . import compat_expanduser, compat_HTMLParseError, compat_realpath from .compat_utils import passthrough_module from ..dependencies import brotli as compat_brotli # noqa: F401 from ..dependencies import websockets as compat_websockets # noqa: F401 from ..dependencies.Cryptodome import AES as compat_pycrypto_AES # noqa: F401 from ..networking.exceptions import HTTPError as compat_HTTPError passthrough_module(__name__, '...utils', ('WINDOWS_VT_MODE', 'windows_enable_vt_mode')) # compat_ctypes_WINFUNCTYPE = ctypes.WINFUNCTYPE # will not work since ctypes.WINFUNCTYPE does not exist in UNIX machines def compat_ctypes_WINFUNCTYPE(*args, **kwargs): return ctypes.WINFUNCTYPE(*args, **kwargs) def compat_setenv(key, value, env=os.environ): env[key] = value compat_base64_b64decode = base64.b64decode compat_basestring = str compat_casefold = str.casefold compat_chr = chr compat_collections_abc = collections.abc compat_cookiejar = compat_http_cookiejar = http.cookiejar compat_cookiejar_Cookie = compat_http_cookiejar_Cookie = http.cookiejar.Cookie compat_cookies = compat_http_cookies = http.cookies compat_cookies_SimpleCookie = compat_http_cookies_SimpleCookie = http.cookies.SimpleCookie compat_etree_Element = compat_xml_etree_ElementTree_Element = etree.Element compat_etree_register_namespace = compat_xml_etree_register_namespace = etree.register_namespace compat_filter = filter compat_get_terminal_size = shutil.get_terminal_size compat_getenv = os.getenv compat_getpass = compat_getpass_getpass = getpass.getpass compat_html_entities = html.entities compat_html_entities_html5 = html.entities.html5 compat_html_parser_HTMLParseError = compat_HTMLParseError compat_HTMLParser = compat_html_parser_HTMLParser = html.parser.HTMLParser compat_http_client = http.client compat_http_server = http.server compat_input = input compat_integer_types = (int, ) compat_itertools_count = itertools.count compat_kwargs = lambda kwargs: kwargs compat_map = map compat_numeric_types = (int, float, complex) compat_os_path_expanduser = compat_expanduser compat_os_path_realpath = compat_realpath compat_print = print compat_shlex_split = shlex.split compat_socket_create_connection = socket.create_connection compat_Struct = struct.Struct compat_struct_pack = struct.pack compat_struct_unpack = struct.unpack compat_subprocess_get_DEVNULL = lambda: subprocess.DEVNULL compat_tokenize_tokenize = tokenize.tokenize compat_urllib_error = urllib.error compat_urllib_HTTPError = compat_HTTPError compat_urllib_parse = urllib.parse compat_urllib_parse_parse_qs = urllib.parse.parse_qs compat_urllib_parse_quote = urllib.parse.quote compat_urllib_parse_quote_plus = urllib.parse.quote_plus compat_urllib_parse_unquote_plus = urllib.parse.unquote_plus compat_urllib_parse_unquote_to_bytes = urllib.parse.unquote_to_bytes compat_urllib_parse_urlunparse = urllib.parse.urlunparse compat_urllib_request = urllib.request compat_urllib_request_DataHandler = urllib.request.DataHandler compat_urllib_response = urllib.response compat_urlretrieve = compat_urllib_request_urlretrieve = urllib.request.urlretrieve compat_xml_parse_error = compat_xml_etree_ElementTree_ParseError = etree.ParseError compat_xpath = lambda xpath: xpath compat_zip = zip workaround_optparse_bug9161 = lambda: None legacy = [] yt-dlp-2024.09.27/yt_dlp/compat/compat_utils.py000066400000000000000000000050451467563447100212120ustar00rootroot00000000000000import collections import contextlib import functools import importlib import sys import types _NO_ATTRIBUTE = object() _Package = collections.namedtuple('Package', ('name', 'version')) def get_package_info(module): return _Package( name=getattr(module, '_yt_dlp__identifier', module.__name__), version=str(next(filter(None, ( getattr(module, attr, None) for attr in ('_yt_dlp__version', '__version__', 'version_string', 'version') )), None))) def _is_package(module): return '__path__' in vars(module) def _is_dunder(name): return name.startswith('__') and name.endswith('__') class EnhancedModule(types.ModuleType): def __bool__(self): return vars(self).get('__bool__', lambda: True)() def __getattribute__(self, attr): try: ret = super().__getattribute__(attr) except AttributeError: if _is_dunder(attr): raise getter = getattr(self, '__getattr__', None) if not getter: raise ret = getter(attr) return ret.fget() if isinstance(ret, property) else ret def passthrough_module(parent, child, allowed_attributes=(..., ), *, callback=lambda _: None): """Passthrough parent module into a child module, creating the parent if necessary""" def __getattr__(attr): if _is_package(parent): with contextlib.suppress(ModuleNotFoundError): return importlib.import_module(f'.{attr}', parent.__name__) ret = from_child(attr) if ret is _NO_ATTRIBUTE: raise AttributeError(f'module {parent.__name__} has no attribute {attr}') callback(attr) return ret @functools.lru_cache(maxsize=None) def from_child(attr): nonlocal child if attr not in allowed_attributes: if ... not in allowed_attributes or _is_dunder(attr): return _NO_ATTRIBUTE if isinstance(child, str): child = importlib.import_module(child, parent.__name__) if _is_package(child): with contextlib.suppress(ImportError): return passthrough_module(f'{parent.__name__}.{attr}', importlib.import_module(f'.{attr}', child.__name__)) with contextlib.suppress(AttributeError): return getattr(child, attr) return _NO_ATTRIBUTE parent = sys.modules.get(parent, types.ModuleType(parent)) parent.__class__ = EnhancedModule parent.__getattr__ = __getattr__ return parent yt-dlp-2024.09.27/yt_dlp/compat/functools.py000066400000000000000000000003771467563447100205260ustar00rootroot00000000000000# flake8: noqa: F405 from functools import * # noqa: F403 from .compat_utils import passthrough_module passthrough_module(__name__, 'functools') del passthrough_module try: _ = cache # >= 3.9 except NameError: cache = lru_cache(maxsize=None) yt-dlp-2024.09.27/yt_dlp/compat/imghdr.py000066400000000000000000000011061467563447100177530ustar00rootroot00000000000000def what(file=None, h=None): """Detect format of image (Currently supports jpeg, png, webp, gif only) Ref: https://github.com/python/cpython/blob/3.11/Lib/imghdr.py Ref: https://www.w3.org/Graphics/JPEG/itu-t81.pdf """ if h is None: with open(file, 'rb') as f: h = f.read(12) if h.startswith(b'RIFF') and h.startswith(b'WEBP', 8): return 'webp' if h.startswith(b'\x89PNG'): return 'png' if h.startswith(b'\xFF\xD8\xFF'): return 'jpeg' if h.startswith(b'GIF'): return 'gif' return None yt-dlp-2024.09.27/yt_dlp/compat/shutil.py000066400000000000000000000015331467563447100200150ustar00rootroot00000000000000# flake8: noqa: F405 from shutil import * # noqa: F403 from .compat_utils import passthrough_module passthrough_module(__name__, 'shutil') del passthrough_module import sys if sys.platform.startswith('freebsd'): import errno import os import shutil # Workaround for PermissionError when using restricted ACL mode on FreeBSD def copy2(src, dst, *args, **kwargs): if os.path.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) shutil.copyfile(src, dst, *args, **kwargs) try: shutil.copystat(src, dst, *args, **kwargs) except PermissionError as e: if e.errno != getattr(errno, 'EPERM', None): raise return dst def move(*args, copy_function=copy2, **kwargs): return shutil.move(*args, copy_function=copy_function, **kwargs) yt-dlp-2024.09.27/yt_dlp/compat/types.py000066400000000000000000000005071467563447100176510ustar00rootroot00000000000000# flake8: noqa: F405 from types import * # noqa: F403 from .compat_utils import passthrough_module passthrough_module(__name__, 'types') del passthrough_module try: # NB: pypy has builtin NoneType, so checking NameError won't work from types import NoneType # >= 3.10 except ImportError: NoneType = type(None) yt-dlp-2024.09.27/yt_dlp/compat/urllib/000077500000000000000000000000001467563447100174225ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/compat/urllib/__init__.py000066400000000000000000000003451467563447100215350ustar00rootroot00000000000000# flake8: noqa: F405 from urllib import * # noqa: F403 del request # noqa: F821 from . import request # noqa: F401 from ..compat_utils import passthrough_module passthrough_module(__name__, 'urllib') del passthrough_module yt-dlp-2024.09.27/yt_dlp/compat/urllib/request.py000066400000000000000000000032331467563447100214650ustar00rootroot00000000000000# flake8: noqa: F405 from urllib.request import * # noqa: F403 from ..compat_utils import passthrough_module passthrough_module(__name__, 'urllib.request') del passthrough_module from .. import compat_os_name if compat_os_name == 'nt': # On older Python versions, proxies are extracted from Windows registry erroneously. [1] # If the https proxy in the registry does not have a scheme, urllib will incorrectly add https:// to it. [2] # It is unlikely that the user has actually set it to be https, so we should be fine to safely downgrade # it to http on these older Python versions to avoid issues # This also applies for ftp proxy type, as ftp:// proxy scheme is not supported. # 1: https://github.com/python/cpython/issues/86793 # 2: https://github.com/python/cpython/blob/51f1ae5ceb0673316c4e4b0175384e892e33cc6e/Lib/urllib/request.py#L2683-L2698 import sys from urllib.request import getproxies_environment, getproxies_registry def getproxies_registry_patched(): proxies = getproxies_registry() if ( sys.version_info >= (3, 10, 5) # https://docs.python.org/3.10/whatsnew/changelog.html#python-3-10-5-final or (3, 9, 13) <= sys.version_info < (3, 10) # https://docs.python.org/3.9/whatsnew/changelog.html#python-3-9-13-final ): return proxies for scheme in ('https', 'ftp'): if scheme in proxies and proxies[scheme].startswith(f'{scheme}://'): proxies[scheme] = 'http' + proxies[scheme][len(scheme):] return proxies def getproxies(): return getproxies_environment() or getproxies_registry_patched() del compat_os_name yt-dlp-2024.09.27/yt_dlp/cookies.py000066400000000000000000001537741467563447100166750ustar00rootroot00000000000000import base64 import collections import contextlib import datetime as dt import functools import glob import hashlib import http.cookiejar import http.cookies import io import json import os import re import shutil import struct import subprocess import sys import tempfile import time import urllib.request from enum import Enum, auto from .aes import ( aes_cbc_decrypt_bytes, aes_gcm_decrypt_and_verify_bytes, unpad_pkcs7, ) from .compat import compat_os_name from .dependencies import ( _SECRETSTORAGE_UNAVAILABLE_REASON, secretstorage, sqlite3, ) from .minicurses import MultilinePrinter, QuietMultilinePrinter from .utils import ( DownloadError, Popen, error_to_str, expand_path, is_path_like, sanitize_url, str_or_none, try_call, write_string, ) from .utils._utils import _YDLLogger from .utils.networking import normalize_url CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'} SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'} class YDLLogger(_YDLLogger): def warning(self, message, only_once=False): # compat return super().warning(message, once=only_once) class ProgressBar(MultilinePrinter): _DELAY, _timer = 0.1, 0 def print(self, message): if time.time() - self._timer > self._DELAY: self.print_at_line(f'[Cookies] {message}', 0) self._timer = time.time() def progress_bar(self): """Return a context manager with a print method. (Optional)""" # Do not print to files/pipes, loggers, or when --no-progress is used if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'): return file = self._ydl._out_files.error try: if not file.isatty(): return except BaseException: return return self.ProgressBar(file, preserve_output=False) def _create_progress_bar(logger): if hasattr(logger, 'progress_bar'): printer = logger.progress_bar() if printer: return printer printer = QuietMultilinePrinter() printer.print = lambda _: None return printer def load_cookies(cookie_file, browser_specification, ydl): cookie_jars = [] if browser_specification is not None: browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification) cookie_jars.append( extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container)) if cookie_file is not None: is_filename = is_path_like(cookie_file) if is_filename: cookie_file = expand_path(cookie_file) jar = YoutubeDLCookieJar(cookie_file) if not is_filename or os.access(cookie_file, os.R_OK): jar.load() cookie_jars.append(jar) return _merge_cookie_jars(cookie_jars) def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None): if browser_name == 'firefox': return _extract_firefox_cookies(profile, container, logger) elif browser_name == 'safari': return _extract_safari_cookies(profile, logger) elif browser_name in CHROMIUM_BASED_BROWSERS: return _extract_chrome_cookies(browser_name, profile, keyring, logger) else: raise ValueError(f'unknown browser: {browser_name}') def _extract_firefox_cookies(profile, container, logger): logger.info('Extracting cookies from firefox') if not sqlite3: logger.warning('Cannot extract cookies from firefox without sqlite3 support. ' 'Please use a Python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() if profile is None: search_roots = list(_firefox_browser_dirs()) elif _is_path(profile): search_roots = [profile] else: search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()] search_root = ', '.join(map(repr, search_roots)) cookie_database_path = _newest(_firefox_cookie_dbs(search_roots)) if cookie_database_path is None: raise FileNotFoundError(f'could not find firefox cookies database in {search_root}') logger.debug(f'Extracting cookies from: "{cookie_database_path}"') container_id = None if container not in (None, 'none'): containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json') if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK): raise FileNotFoundError(f'could not read containers.json in {search_root}') with open(containers_path, encoding='utf8') as containers: identities = json.load(containers).get('identities', []) container_id = next((context.get('userContextId') for context in identities if container in ( context.get('name'), try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()), )), None) if not isinstance(container_id, int): raise ValueError(f'could not find firefox container "{container}" in containers.json') with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) if isinstance(container_id, int): logger.debug( f'Only loading cookies from firefox container "{container}", ID {container_id}') cursor.execute( 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?', (f'%userContextId={container_id}', f'%userContextId={container_id}&%')) elif container == 'none': logger.debug('Only loading cookies not belonging to any container') cursor.execute( 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")') else: cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies') jar = YoutubeDLCookieJar() with _create_progress_bar(logger) as progress_bar: table = cursor.fetchall() total_cookie_count = len(table) for i, (host, name, value, path, expiry, is_secure) in enumerate(table): progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') cookie = http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False, comment=None, comment_url=None, rest={}) jar.set_cookie(cookie) logger.info(f'Extracted {len(jar)} cookies from firefox') return jar finally: if cursor is not None: cursor.connection.close() def _firefox_browser_dirs(): if sys.platform in ('cygwin', 'win32'): yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles') elif sys.platform == 'darwin': yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles') else: yield from map(os.path.expanduser, ( '~/.mozilla/firefox', '~/snap/firefox/common/.mozilla/firefox', '~/.var/app/org.mozilla.firefox/.mozilla/firefox', )) def _firefox_cookie_dbs(roots): for root in map(os.path.abspath, roots): for pattern in ('', '*/', 'Profiles/*/'): yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite')) def _get_chromium_based_browser_settings(browser_name): # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md if sys.platform in ('cygwin', 'win32'): appdata_local = os.path.expandvars('%LOCALAPPDATA%') appdata_roaming = os.path.expandvars('%APPDATA%') browser_dir = { 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'), 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'), 'chromium': os.path.join(appdata_local, R'Chromium\User Data'), 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'), 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'), 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'), 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'), }[browser_name] elif sys.platform == 'darwin': appdata = os.path.expanduser('~/Library/Application Support') browser_dir = { 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'), 'chrome': os.path.join(appdata, 'Google/Chrome'), 'chromium': os.path.join(appdata, 'Chromium'), 'edge': os.path.join(appdata, 'Microsoft Edge'), 'opera': os.path.join(appdata, 'com.operasoftware.Opera'), 'vivaldi': os.path.join(appdata, 'Vivaldi'), 'whale': os.path.join(appdata, 'Naver/Whale'), }[browser_name] else: config = _config_home() browser_dir = { 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'), 'chrome': os.path.join(config, 'google-chrome'), 'chromium': os.path.join(config, 'chromium'), 'edge': os.path.join(config, 'microsoft-edge'), 'opera': os.path.join(config, 'opera'), 'vivaldi': os.path.join(config, 'vivaldi'), 'whale': os.path.join(config, 'naver-whale'), }[browser_name] # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" keyring_name = { 'brave': 'Brave', 'chrome': 'Chrome', 'chromium': 'Chromium', 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium', 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium', 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome', 'whale': 'Whale', }[browser_name] browsers_without_profiles = {'opera'} return { 'browser_dir': browser_dir, 'keyring_name': keyring_name, 'supports_profiles': browser_name not in browsers_without_profiles, } def _extract_chrome_cookies(browser_name, profile, keyring, logger): logger.info(f'Extracting cookies from {browser_name}') if not sqlite3: logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. ' 'Please use a Python interpreter compiled with sqlite3 support') return YoutubeDLCookieJar() config = _get_chromium_based_browser_settings(browser_name) if profile is None: search_root = config['browser_dir'] elif _is_path(profile): search_root = profile config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile else: if config['supports_profiles']: search_root = os.path.join(config['browser_dir'], profile) else: logger.error(f'{browser_name} does not support profiles') search_root = config['browser_dir'] cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger)) if cookie_database_path is None: raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"') logger.debug(f'Extracting cookies from: "{cookie_database_path}"') decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring) with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir: cursor = None try: cursor = _open_database_copy(cookie_database_path, tmpdir) cursor.connection.text_factory = bytes column_names = _get_column_names(cursor, 'cookies') secure_column = 'is_secure' if 'is_secure' in column_names else 'secure' cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies') jar = YoutubeDLCookieJar() failed_cookies = 0 unencrypted_cookies = 0 with _create_progress_bar(logger) as progress_bar: table = cursor.fetchall() total_cookie_count = len(table) for i, line in enumerate(table): progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}') is_encrypted, cookie = _process_chrome_cookie(decryptor, *line) if not cookie: failed_cookies += 1 continue elif not is_encrypted: unencrypted_cookies += 1 jar.set_cookie(cookie) if failed_cookies > 0: failed_message = f' ({failed_cookies} could not be decrypted)' else: failed_message = '' logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}') counts = decryptor._cookie_counts.copy() counts['unencrypted'] = unencrypted_cookies logger.debug(f'cookie version breakdown: {counts}') return jar except PermissionError as error: if compat_os_name == 'nt' and error.errno == 13: message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info' logger.error(message) raise DownloadError(message) # force exit raise finally: if cursor is not None: cursor.connection.close() def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure): host_key = host_key.decode() name = name.decode() value = value.decode() path = path.decode() is_encrypted = not value and encrypted_value if is_encrypted: value = decryptor.decrypt(encrypted_value) if value is None: return is_encrypted, None # In chrome, session cookies have expires_utc set to 0 # In our cookie-store, cookies that do not expire should have expires set to None if not expires_utc: expires_utc = None return is_encrypted, http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False, comment=None, comment_url=None, rest={}) class ChromeCookieDecryptor: """ Overview: Linux: - cookies are either v10 or v11 - v10: AES-CBC encrypted with a fixed key - also attempts empty password if decryption fails - v11: AES-CBC encrypted with an OS protected key (keyring) - also attempts empty password if decryption fails - v11 keys can be stored in various places depending on the activate desktop environment [2] Mac: - cookies are either v10 or not v10 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux - not v10: 'old data' stored as plaintext Windows: - cookies are either v10 or not v10 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI - not v10: encrypted with DPAPI Sources: - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/ - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc - KeyStorageLinux::CreateService """ _cookie_counts = {} def decrypt(self, encrypted_value): raise NotImplementedError('Must be implemented by sub classes') def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None): if sys.platform == 'darwin': return MacChromeCookieDecryptor(browser_keyring_name, logger) elif sys.platform in ('win32', 'cygwin'): return WindowsChromeCookieDecryptor(browser_root, logger) return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring) class LinuxChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_keyring_name, logger, *, keyring=None): self._logger = logger self._v10_key = self.derive_key(b'peanuts') self._empty_key = self.derive_key(b'') self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0} self._browser_keyring_name = browser_keyring_name self._keyring = keyring @functools.cached_property def _v11_key(self): password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger) return None if password is None else self.derive_key(password) @staticmethod def derive_key(password): # values from # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16) def decrypt(self, encrypted_value): """ following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt with an empty password. The failure detection is not the same as what chromium uses so the results won't be perfect References: - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/ - a bugfix to try an empty password as a fallback """ version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger) elif version == b'v11': self._cookie_counts['v11'] += 1 if self._v11_key is None: self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True) return None return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger) else: self._logger.warning(f'unknown cookie version: "{version}"', only_once=True) self._cookie_counts['other'] += 1 return None class MacChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_keyring_name, logger): self._logger = logger password = _get_mac_keyring_password(browser_keyring_name, logger) self._v10_key = None if password is None else self.derive_key(password) self._cookie_counts = {'v10': 0, 'other': 0} @staticmethod def derive_key(password): # values from # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16) def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 if self._v10_key is None: self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger) else: self._cookie_counts['other'] += 1 # other prefixes are considered 'old data' which were stored as plaintext # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm return encrypted_value class WindowsChromeCookieDecryptor(ChromeCookieDecryptor): def __init__(self, browser_root, logger): self._logger = logger self._v10_key = _get_windows_v10_key(browser_root, logger) self._cookie_counts = {'v10': 0, 'other': 0} def decrypt(self, encrypted_value): version = encrypted_value[:3] ciphertext = encrypted_value[3:] if version == b'v10': self._cookie_counts['v10'] += 1 if self._v10_key is None: self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True) return None # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc # kNonceLength nonce_length = 96 // 8 # boringssl # EVP_AEAD_AES_GCM_TAG_LEN authentication_tag_length = 16 raw_ciphertext = ciphertext nonce = raw_ciphertext[:nonce_length] ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length] authentication_tag = raw_ciphertext[-authentication_tag_length:] return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger) else: self._cookie_counts['other'] += 1 # any other prefix means the data is DPAPI encrypted # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc return _decrypt_windows_dpapi(encrypted_value, self._logger).decode() def _extract_safari_cookies(profile, logger): if sys.platform != 'darwin': raise ValueError(f'unsupported platform: {sys.platform}') if profile: cookies_path = os.path.expanduser(profile) if not os.path.isfile(cookies_path): raise FileNotFoundError('custom safari cookies database not found') else: cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies') if not os.path.isfile(cookies_path): logger.debug('Trying secondary cookie location') cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies') if not os.path.isfile(cookies_path): raise FileNotFoundError('could not find safari cookies database') with open(cookies_path, 'rb') as f: cookies_data = f.read() jar = parse_safari_cookies(cookies_data, logger=logger) logger.info(f'Extracted {len(jar)} cookies from safari') return jar class ParserError(Exception): pass class DataParser: def __init__(self, data, logger): self._data = data self.cursor = 0 self._logger = logger def read_bytes(self, num_bytes): if num_bytes < 0: raise ParserError(f'invalid read of {num_bytes} bytes') end = self.cursor + num_bytes if end > len(self._data): raise ParserError('reached end of input') data = self._data[self.cursor:end] self.cursor = end return data def expect_bytes(self, expected_value, message): value = self.read_bytes(len(expected_value)) if value != expected_value: raise ParserError(f'unexpected value: {value} != {expected_value} ({message})') def read_uint(self, big_endian=False): data_format = '>I' if big_endian else ' 0: self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}') elif num_bytes < 0: raise ParserError(f'invalid skip of {num_bytes} bytes') def skip_to(self, offset, description='unknown'): self.skip(offset - self.cursor, description) def skip_to_end(self, description='unknown'): self.skip_to(len(self._data), description) def _mac_absolute_time_to_posix(timestamp): return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp()) def _parse_safari_cookies_header(data, logger): p = DataParser(data, logger) p.expect_bytes(b'cook', 'database signature') number_of_pages = p.read_uint(big_endian=True) page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)] return page_sizes, p.cursor def _parse_safari_cookies_page(data, jar, logger): p = DataParser(data, logger) p.expect_bytes(b'\x00\x00\x01\x00', 'page signature') number_of_cookies = p.read_uint() record_offsets = [p.read_uint() for _ in range(number_of_cookies)] if number_of_cookies == 0: logger.debug(f'a cookies page of size {len(data)} has no cookies') return p.skip_to(record_offsets[0], 'unknown page header field') with _create_progress_bar(logger) as progress_bar: for i, record_offset in enumerate(record_offsets): progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}') p.skip_to(record_offset, 'space between records') record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger) p.read_bytes(record_length) p.skip_to_end('space in between pages') def _parse_safari_cookies_record(data, jar, logger): p = DataParser(data, logger) record_size = p.read_uint() p.skip(4, 'unknown record field 1') flags = p.read_uint() is_secure = bool(flags & 0x0001) p.skip(4, 'unknown record field 2') domain_offset = p.read_uint() name_offset = p.read_uint() path_offset = p.read_uint() value_offset = p.read_uint() p.skip(8, 'unknown record field 3') expiration_date = _mac_absolute_time_to_posix(p.read_double()) _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841 try: p.skip_to(domain_offset) domain = p.read_cstring() p.skip_to(name_offset) name = p.read_cstring() p.skip_to(path_offset) path = p.read_cstring() p.skip_to(value_offset) value = p.read_cstring() except UnicodeDecodeError: logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True) return record_size p.skip_to(record_size, 'space at the end of the record') cookie = http.cookiejar.Cookie( version=0, name=name, value=value, port=None, port_specified=False, domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'), path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False, comment=None, comment_url=None, rest={}) jar.set_cookie(cookie) return record_size def parse_safari_cookies(data, jar=None, logger=YDLLogger()): """ References: - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc - this data appears to be out of date but the important parts of the database structure is the same - there are a few bytes here and there which are skipped during parsing """ if jar is None: jar = YoutubeDLCookieJar() page_sizes, body_start = _parse_safari_cookies_header(data, logger) p = DataParser(data[body_start:], logger) for page_size in page_sizes: _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger) p.skip_to_end('footer') return jar class _LinuxDesktopEnvironment(Enum): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h DesktopEnvironment """ OTHER = auto() CINNAMON = auto() DEEPIN = auto() GNOME = auto() KDE3 = auto() KDE4 = auto() KDE5 = auto() KDE6 = auto() PANTHEON = auto() UKUI = auto() UNITY = auto() XFCE = auto() LXQT = auto() class _LinuxKeyring(Enum): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h SelectedLinuxBackend """ KWALLET = auto() # KDE4 KWALLET5 = auto() KWALLET6 = auto() GNOMEKEYRING = auto() BASICTEXT = auto() SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys() def _get_linux_desktop_environment(env, logger): """ https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc GetDesktopEnvironment """ xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None) desktop_session = env.get('DESKTOP_SESSION', None) if xdg_current_desktop is not None: for part in map(str.strip, xdg_current_desktop.split(':')): if part == 'Unity': if desktop_session is not None and 'gnome-fallback' in desktop_session: return _LinuxDesktopEnvironment.GNOME else: return _LinuxDesktopEnvironment.UNITY elif part == 'Deepin': return _LinuxDesktopEnvironment.DEEPIN elif part == 'GNOME': return _LinuxDesktopEnvironment.GNOME elif part == 'X-Cinnamon': return _LinuxDesktopEnvironment.CINNAMON elif part == 'KDE': kde_version = env.get('KDE_SESSION_VERSION', None) if kde_version == '5': return _LinuxDesktopEnvironment.KDE5 elif kde_version == '6': return _LinuxDesktopEnvironment.KDE6 elif kde_version == '4': return _LinuxDesktopEnvironment.KDE4 else: logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4') return _LinuxDesktopEnvironment.KDE4 elif part == 'Pantheon': return _LinuxDesktopEnvironment.PANTHEON elif part == 'XFCE': return _LinuxDesktopEnvironment.XFCE elif part == 'UKUI': return _LinuxDesktopEnvironment.UKUI elif part == 'LXQt': return _LinuxDesktopEnvironment.LXQT logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"') elif desktop_session is not None: if desktop_session == 'deepin': return _LinuxDesktopEnvironment.DEEPIN elif desktop_session in ('mate', 'gnome'): return _LinuxDesktopEnvironment.GNOME elif desktop_session in ('kde4', 'kde-plasma'): return _LinuxDesktopEnvironment.KDE4 elif desktop_session == 'kde': if 'KDE_SESSION_VERSION' in env: return _LinuxDesktopEnvironment.KDE4 else: return _LinuxDesktopEnvironment.KDE3 elif 'xfce' in desktop_session or desktop_session == 'xubuntu': return _LinuxDesktopEnvironment.XFCE elif desktop_session == 'ukui': return _LinuxDesktopEnvironment.UKUI else: logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"') else: if 'GNOME_DESKTOP_SESSION_ID' in env: return _LinuxDesktopEnvironment.GNOME elif 'KDE_FULL_SESSION' in env: if 'KDE_SESSION_VERSION' in env: return _LinuxDesktopEnvironment.KDE4 else: return _LinuxDesktopEnvironment.KDE3 return _LinuxDesktopEnvironment.OTHER def _choose_linux_keyring(logger): """ SelectBackend in [1] There is currently support for forcing chromium to use BASIC_TEXT by creating a file called `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1]) does not appear to be called anywhere other than in tests, so the user would have to create this file manually and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring. References: - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc """ desktop_environment = _get_linux_desktop_environment(os.environ, logger) logger.debug(f'detected desktop environment: {desktop_environment.name}') if desktop_environment == _LinuxDesktopEnvironment.KDE4: linux_keyring = _LinuxKeyring.KWALLET elif desktop_environment == _LinuxDesktopEnvironment.KDE5: linux_keyring = _LinuxKeyring.KWALLET5 elif desktop_environment == _LinuxDesktopEnvironment.KDE6: linux_keyring = _LinuxKeyring.KWALLET6 elif desktop_environment in ( _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER, ): linux_keyring = _LinuxKeyring.BASICTEXT else: linux_keyring = _LinuxKeyring.GNOMEKEYRING return linux_keyring def _get_kwallet_network_wallet(keyring, logger): """ The name of the wallet used to store network passwords. https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc KWalletDBus::NetworkWallet which does a dbus call to the following function: https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html Wallet::NetworkWallet """ default_wallet = 'kdewallet' try: if keyring == _LinuxKeyring.KWALLET: service_name = 'org.kde.kwalletd' wallet_path = '/modules/kwalletd' elif keyring == _LinuxKeyring.KWALLET5: service_name = 'org.kde.kwalletd5' wallet_path = '/modules/kwalletd5' elif keyring == _LinuxKeyring.KWALLET6: service_name = 'org.kde.kwalletd6' wallet_path = '/modules/kwalletd6' else: raise ValueError(keyring) stdout, _, returncode = Popen.run([ 'dbus-send', '--session', '--print-reply=literal', f'--dest={service_name}', wallet_path, 'org.kde.KWallet.networkWallet', ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: logger.warning('failed to read NetworkWallet') return default_wallet else: logger.debug(f'NetworkWallet = "{stdout.strip()}"') return stdout.strip() except Exception as e: logger.warning(f'exception while obtaining NetworkWallet: {e}') return default_wallet def _get_kwallet_password(browser_keyring_name, keyring, logger): logger.debug(f'using kwallet-query to obtain password from {keyring.name}') if shutil.which('kwallet-query') is None: logger.error('kwallet-query command not found. KWallet and kwallet-query ' 'must be installed to read from KWallet. kwallet-query should be' 'included in the kwallet package for your distribution') return b'' network_wallet = _get_kwallet_network_wallet(keyring, logger) try: stdout, _, returncode = Popen.run([ 'kwallet-query', '--read-password', f'{browser_keyring_name} Safe Storage', '--folder', f'{browser_keyring_name} Keys', network_wallet, ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: logger.error(f'kwallet-query failed with return code {returncode}. ' 'Please consult the kwallet-query man page for details') return b'' else: if stdout.lower().startswith(b'failed to read'): logger.debug('failed to read password from kwallet. Using empty string instead') # this sometimes occurs in KDE because chrome does not check hasEntry and instead # just tries to read the value (which kwallet returns "") whereas kwallet-query # checks hasEntry. To verify this: # dbus-monitor "interface='org.kde.KWallet'" "type=method_return" # while starting chrome. # this was identified as a bug later and fixed in # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764 return b'' else: logger.debug('password found') return stdout.rstrip(b'\n') except Exception as e: logger.warning(f'exception running kwallet-query: {error_to_str(e)}') return b'' def _get_gnome_keyring_password(browser_keyring_name, logger): if not secretstorage: logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}') return b'' # the Gnome keyring does not seem to organise keys in the same way as KWallet, # using `dbus-monitor` during startup, it can be observed that chromium lists all keys # and presumably searches for its key in the list. It appears that we must do the same. # https://github.com/jaraco/keyring/issues/556 with contextlib.closing(secretstorage.dbus_init()) as con: col = secretstorage.get_default_collection(con) for item in col.get_all_items(): if item.get_label() == f'{browser_keyring_name} Safe Storage': return item.get_secret() logger.error('failed to read from keyring') return b'' def _get_linux_keyring_password(browser_keyring_name, keyring, logger): # note: chrome/chromium can be run with the following flags to determine which keyring backend # it has chosen to use # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_ # Chromium supports a flag: --password-store= so the automatic detection # will not be sufficient in all cases. keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger) logger.debug(f'Chosen keyring: {keyring.name}') if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6): return _get_kwallet_password(browser_keyring_name, keyring, logger) elif keyring == _LinuxKeyring.GNOMEKEYRING: return _get_gnome_keyring_password(browser_keyring_name, logger) elif keyring == _LinuxKeyring.BASICTEXT: # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required) return None assert False, f'Unknown keyring {keyring}' def _get_mac_keyring_password(browser_keyring_name, logger): logger.debug('using find-generic-password to obtain password from OSX keychain') try: stdout, _, returncode = Popen.run( ['security', 'find-generic-password', '-w', # write password to stdout '-a', browser_keyring_name, # match 'account' '-s', f'{browser_keyring_name} Safe Storage'], # match 'service' stdout=subprocess.PIPE, stderr=subprocess.DEVNULL) if returncode: logger.warning('find-generic-password failed') return None return stdout.rstrip(b'\n') except Exception as e: logger.warning(f'exception running find-generic-password: {error_to_str(e)}') return None def _get_windows_v10_key(browser_root, logger): """ References: - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc """ path = _newest(_find_files(browser_root, 'Local State', logger)) if path is None: logger.error('could not find local state file') return None logger.debug(f'Found local state file at "{path}"') with open(path, encoding='utf8') as f: data = json.load(f) try: # kOsCryptEncryptedKeyPrefName in [1] base64_key = data['os_crypt']['encrypted_key'] except KeyError: logger.error('no encrypted key in Local State') return None encrypted_key = base64.b64decode(base64_key) # kDPAPIKeyPrefix in [1] prefix = b'DPAPI' if not encrypted_key.startswith(prefix): logger.error('invalid key') return None return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger) def pbkdf2_sha1(password, salt, iterations, key_length): return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length) def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16): for key in keys: plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector)) try: return plaintext.decode() except UnicodeDecodeError: pass logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger): try: plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce) except ValueError: logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True) return None try: return plaintext.decode() except UnicodeDecodeError: logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True) return None def _decrypt_windows_dpapi(ciphertext, logger): """ References: - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata """ import ctypes import ctypes.wintypes class DATA_BLOB(ctypes.Structure): _fields_ = [('cbData', ctypes.wintypes.DWORD), ('pbData', ctypes.POINTER(ctypes.c_char))] buffer = ctypes.create_string_buffer(ciphertext) blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer) blob_out = DATA_BLOB() ret = ctypes.windll.crypt32.CryptUnprotectData( ctypes.byref(blob_in), # pDataIn None, # ppszDataDescr: human readable description of pDataIn None, # pOptionalEntropy: salt? None, # pvReserved: must be NULL None, # pPromptStruct: information about prompts to display 0, # dwFlags ctypes.byref(blob_out), # pDataOut ) if not ret: message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info' logger.error(message) raise DownloadError(message) # force exit result = ctypes.string_at(blob_out.pbData, blob_out.cbData) ctypes.windll.kernel32.LocalFree(blob_out.pbData) return result def _config_home(): return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config')) def _open_database_copy(database_path, tmpdir): # cannot open sqlite databases if they are already in use (e.g. by the browser) database_copy_path = os.path.join(tmpdir, 'temporary.sqlite') shutil.copy(database_path, database_copy_path) conn = sqlite3.connect(database_copy_path) return conn.cursor() def _get_column_names(cursor, table_name): table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall() return [row[1].decode() for row in table_info] def _newest(files): return max(files, key=lambda path: os.lstat(path).st_mtime, default=None) def _find_files(root, filename, logger): # if there are multiple browser profiles, take the most recently used one i = 0 with _create_progress_bar(logger) as progress_bar: for curr_root, _, files in os.walk(root): for file in files: i += 1 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched') if file == filename: yield os.path.join(curr_root, file) def _merge_cookie_jars(jars): output_jar = YoutubeDLCookieJar() for jar in jars: for cookie in jar: output_jar.set_cookie(cookie) if jar.filename is not None: output_jar.filename = jar.filename return output_jar def _is_path(value): return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep) def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None): if browser_name not in SUPPORTED_BROWSERS: raise ValueError(f'unsupported browser: "{browser_name}"') if keyring not in (None, *SUPPORTED_KEYRINGS): raise ValueError(f'unsupported keyring: "{keyring}"') if profile is not None and _is_path(expand_path(profile)): profile = expand_path(profile) return browser_name, profile, keyring, container class LenientSimpleCookie(http.cookies.SimpleCookie): """More lenient version of http.cookies.SimpleCookie""" # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py # We use Morsel's legal key chars to avoid errors on setting values _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~') _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}') _RESERVED = { 'expires', 'path', 'comment', 'domain', 'max-age', 'secure', 'httponly', 'version', 'samesite', } _FLAGS = {'secure', 'httponly'} # Added 'bad' group to catch the remaining value _COOKIE_PATTERN = re.compile(r''' \s* # Optional whitespace at start of cookie (?P # Start of group 'key' [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter ) # End of group 'key' ( # Optional group: there may not be a value. \s*=\s* # Equal Sign ( # Start of potential value (?P # Start of group 'val' "(?:[^\\"]|\\.)*" # Any doublequoted string | # or \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr | # or [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string ) # End of group 'val' | # or (?P(?:\\;|[^;])*?) # 'bad' group fallback for invalid values ) # End of potential value )? # End of optional value group \s* # Any number of spaces. (\s+|;|$) # Ending either at space, semicolon, or EOS. ''', re.ASCII | re.VERBOSE) def load(self, data): # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776 if not isinstance(data, str): return super().load(data) morsel = None for match in self._COOKIE_PATTERN.finditer(data): if match.group('bad'): morsel = None continue key, value = match.group('key', 'val') is_attribute = False if key.startswith('$'): key = key[1:] is_attribute = True lower_key = key.lower() if lower_key in self._RESERVED: if morsel is None: continue if value is None: if lower_key not in self._FLAGS: morsel = None continue value = True else: value, _ = self.value_decode(value) morsel[key] = value elif is_attribute: morsel = None elif value is not None: morsel = self.get(key, http.cookies.Morsel()) real_value, coded_value = self.value_decode(value) morsel.set(key, real_value, coded_value) self[key] = morsel else: morsel = None class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar): """ See [1] for cookie file format. 1. https://curl.haxx.se/docs/http-cookies.html """ _HTTPONLY_PREFIX = '#HttpOnly_' _ENTRY_LEN = 7 _HEADER = '''# Netscape HTTP Cookie File # This file is generated by yt-dlp. Do not edit. ''' _CookieFileEntry = collections.namedtuple( 'CookieFileEntry', ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value')) def __init__(self, filename=None, *args, **kwargs): super().__init__(None, *args, **kwargs) if is_path_like(filename): filename = os.fspath(filename) self.filename = filename @staticmethod def _true_or_false(cndn): return 'TRUE' if cndn else 'FALSE' @contextlib.contextmanager def open(self, file, *, write=False): if is_path_like(file): with open(file, 'w' if write else 'r', encoding='utf-8') as f: yield f else: if write: file.truncate(0) yield file def _really_save(self, f, ignore_discard, ignore_expires): now = time.time() for cookie in self: if (not ignore_discard and cookie.discard or not ignore_expires and cookie.is_expired(now)): continue name, value = cookie.name, cookie.value if value is None: # cookies.txt regards 'Set-Cookie: foo' as a cookie # with no name, whereas http.cookiejar regards it as a # cookie with no value. name, value = '', name f.write('{}\n'.format('\t'.join(( cookie.domain, self._true_or_false(cookie.domain.startswith('.')), cookie.path, self._true_or_false(cookie.secure), str_or_none(cookie.expires, default=''), name, value, )))) def save(self, filename=None, ignore_discard=True, ignore_expires=True): """ Save cookies to a file. Code is taken from CPython 3.6 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """ if filename is None: if self.filename is not None: filename = self.filename else: raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) # Store session cookies with `expires` set to 0 instead of an empty string for cookie in self: if cookie.expires is None: cookie.expires = 0 with self.open(filename, write=True) as f: f.write(self._HEADER) self._really_save(f, ignore_discard, ignore_expires) def load(self, filename=None, ignore_discard=True, ignore_expires=True): """Load cookies from a file.""" if filename is None: if self.filename is not None: filename = self.filename else: raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT) def prepare_line(line): if line.startswith(self._HTTPONLY_PREFIX): line = line[len(self._HTTPONLY_PREFIX):] # comments and empty lines are fine if line.startswith('#') or not line.strip(): return line cookie_list = line.split('\t') if len(cookie_list) != self._ENTRY_LEN: raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}') cookie = self._CookieFileEntry(*cookie_list) if cookie.expires_at and not cookie.expires_at.isdigit(): raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}') return line cf = io.StringIO() with self.open(filename) as f: for line in f: try: cf.write(prepare_line(line)) except http.cookiejar.LoadError as e: if f'{line.strip()} '[0] in '[{"': raise http.cookiejar.LoadError( 'Cookies file must be Netscape formatted, not JSON. See ' 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp') write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n') continue cf.seek(0) self._really_load(cf, filename, ignore_discard, ignore_expires) # Session cookies are denoted by either `expires` field set to # an empty string or 0. MozillaCookieJar only recognizes the former # (see [1]). So we need force the latter to be recognized as session # cookies on our own. # Session cookies may be important for cookies-based authentication, # e.g. usually, when user does not check 'Remember me' check box while # logging in on a site, some important cookies are stored as session # cookies so that not recognizing them will result in failed login. # 1. https://bugs.python.org/issue17164 for cookie in self: # Treat `expires=0` cookies as session cookies if cookie.expires == 0: cookie.expires = None cookie.discard = True def get_cookie_header(self, url): """Generate a Cookie HTTP header for a given url""" cookie_req = urllib.request.Request(normalize_url(sanitize_url(url))) self.add_cookie_header(cookie_req) return cookie_req.get_header('Cookie') def get_cookies_for_url(self, url): """Generate a list of Cookie objects for a given url""" # Policy `_now` attribute must be set before calling `_cookies_for_request` # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360 self._policy._now = self._now = int(time.time()) return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url)))) def clear(self, *args, **kwargs): with contextlib.suppress(KeyError): return super().clear(*args, **kwargs) yt-dlp-2024.09.27/yt_dlp/dependencies/000077500000000000000000000000001467563447100172745ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/dependencies/Cryptodome.py000066400000000000000000000026041467563447100217750ustar00rootroot00000000000000from ..compat.compat_utils import passthrough_module try: import Cryptodome as _parent except ImportError: try: import Crypto as _parent except (ImportError, SyntaxError): # Old Crypto gives SyntaxError in newer Python _parent = passthrough_module(__name__, 'no_Cryptodome') __bool__ = lambda: False del passthrough_module __version__ = '' AES = PKCS1_v1_5 = Blowfish = PKCS1_OAEP = SHA1 = CMAC = RSA = None try: if _parent.__name__ == 'Cryptodome': from Cryptodome import __version__ from Cryptodome.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 from Cryptodome.Hash import CMAC, SHA1 from Cryptodome.PublicKey import RSA elif _parent.__name__ == 'Crypto': from Crypto import __version__ from Crypto.Cipher import AES, PKCS1_OAEP, Blowfish, PKCS1_v1_5 # noqa: F401 from Crypto.Hash import CMAC, SHA1 # noqa: F401 from Crypto.PublicKey import RSA # noqa: F401 except ImportError: __version__ = f'broken {__version__}'.strip() _yt_dlp__identifier = _parent.__name__ if AES and _yt_dlp__identifier == 'Crypto': try: # In pycrypto, mode defaults to ECB. See: # https://www.pycryptodome.org/en/latest/src/vs_pycrypto.html#:~:text=not%20have%20ECB%20as%20default%20mode AES.new(b'abcdefghijklmnop') except TypeError: _yt_dlp__identifier = 'pycrypto' yt-dlp-2024.09.27/yt_dlp/dependencies/__init__.py000066400000000000000000000042361467563447100214120ustar00rootroot00000000000000# flake8: noqa: F401 """Imports all optional dependencies for the project. An attribute "_yt_dlp__identifier" may be inserted into the module if it uses an ambiguous namespace""" try: import brotlicffi as brotli except ImportError: try: import brotli except ImportError: brotli = None try: import certifi except ImportError: certifi = None else: from os.path import exists as _path_exists # The certificate may not be bundled in executable if not _path_exists(certifi.where()): certifi = None try: import mutagen except ImportError: mutagen = None secretstorage = None try: import secretstorage _SECRETSTORAGE_UNAVAILABLE_REASON = None except ImportError: _SECRETSTORAGE_UNAVAILABLE_REASON = ( 'as the `secretstorage` module is not installed. ' 'Please install by running `python3 -m pip install secretstorage`') except Exception as _err: _SECRETSTORAGE_UNAVAILABLE_REASON = f'as the `secretstorage` module could not be initialized. {_err}' try: import sqlite3 # We need to get the underlying `sqlite` version, see https://github.com/yt-dlp/yt-dlp/issues/8152 sqlite3._yt_dlp__version = sqlite3.sqlite_version except ImportError: # although sqlite3 is part of the standard library, it is possible to compile Python without # sqlite support. See: https://github.com/yt-dlp/yt-dlp/issues/544 sqlite3 = None try: import websockets except ImportError: websockets = None try: import urllib3 except ImportError: urllib3 = None try: import requests except ImportError: requests = None try: import xattr # xattr or pyxattr except ImportError: xattr = None else: if hasattr(xattr, 'set'): # pyxattr xattr._yt_dlp__identifier = 'pyxattr' try: import curl_cffi except ImportError: curl_cffi = None from . import Cryptodome all_dependencies = {k: v for k, v in globals().items() if not k.startswith('_')} available_dependencies = {k: v for k, v in all_dependencies.items() if v} # Deprecated Cryptodome_AES = Cryptodome.AES __all__ = [ 'all_dependencies', 'available_dependencies', *all_dependencies.keys(), ] yt-dlp-2024.09.27/yt_dlp/downloader/000077500000000000000000000000001467563447100170045ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/downloader/__init__.py000066400000000000000000000106211467563447100211150ustar00rootroot00000000000000from ..utils import NO_DEFAULT, determine_protocol def get_suitable_downloader(info_dict, params={}, default=NO_DEFAULT, protocol=None, to_stdout=False): info_dict['protocol'] = determine_protocol(info_dict) info_copy = info_dict.copy() info_copy['to_stdout'] = to_stdout protocols = (protocol or info_copy['protocol']).split('+') downloaders = [_get_suitable_downloader(info_copy, proto, params, default) for proto in protocols] if set(downloaders) == {FFmpegFD} and FFmpegFD.can_merge_formats(info_copy, params): return FFmpegFD elif (set(downloaders) == {DashSegmentsFD} and not (to_stdout and len(protocols) > 1) and set(protocols) == {'http_dash_segments_generator'}): return DashSegmentsFD elif len(downloaders) == 1: return downloaders[0] return None # Some of these require get_suitable_downloader from .common import FileDownloader from .dash import DashSegmentsFD from .external import FFmpegFD, get_external_downloader from .f4m import F4mFD from .fc2 import FC2LiveFD from .hls import HlsFD from .http import HttpFD from .ism import IsmFD from .mhtml import MhtmlFD from .niconico import NiconicoDmcFD, NiconicoLiveFD from .rtmp import RtmpFD from .rtsp import RtspFD from .websocket import WebSocketFragmentFD from .youtube_live_chat import YoutubeLiveChatFD PROTOCOL_MAP = { 'rtmp': RtmpFD, 'rtmpe': RtmpFD, 'rtmp_ffmpeg': FFmpegFD, 'm3u8_native': HlsFD, 'm3u8': FFmpegFD, 'mms': RtspFD, 'rtsp': RtspFD, 'f4m': F4mFD, 'http_dash_segments': DashSegmentsFD, 'http_dash_segments_generator': DashSegmentsFD, 'ism': IsmFD, 'mhtml': MhtmlFD, 'niconico_dmc': NiconicoDmcFD, 'niconico_live': NiconicoLiveFD, 'fc2_live': FC2LiveFD, 'websocket_frag': WebSocketFragmentFD, 'youtube_live_chat': YoutubeLiveChatFD, 'youtube_live_chat_replay': YoutubeLiveChatFD, } def shorten_protocol_name(proto, simplify=False): short_protocol_names = { 'm3u8_native': 'm3u8', 'm3u8': 'm3u8F', 'rtmp_ffmpeg': 'rtmpF', 'http_dash_segments': 'dash', 'http_dash_segments_generator': 'dashG', 'niconico_dmc': 'dmc', 'websocket_frag': 'WSfrag', } if simplify: short_protocol_names.update({ 'https': 'http', 'ftps': 'ftp', 'm3u8': 'm3u8', # Reverse above m3u8 mapping 'm3u8_native': 'm3u8', 'http_dash_segments_generator': 'dash', 'rtmp_ffmpeg': 'rtmp', 'm3u8_frag_urls': 'm3u8', 'dash_frag_urls': 'dash', }) return short_protocol_names.get(proto, proto) def _get_suitable_downloader(info_dict, protocol, params, default): """Get the downloader class that can handle the info dict.""" if default is NO_DEFAULT: default = HttpFD if (info_dict.get('section_start') or info_dict.get('section_end')) and FFmpegFD.can_download(info_dict): return FFmpegFD info_dict['protocol'] = protocol downloaders = params.get('external_downloader') external_downloader = ( downloaders if isinstance(downloaders, str) or downloaders is None else downloaders.get(shorten_protocol_name(protocol, True), downloaders.get('default'))) if external_downloader is None: if info_dict['to_stdout'] and FFmpegFD.can_merge_formats(info_dict, params): return FFmpegFD elif external_downloader.lower() != 'native': ed = get_external_downloader(external_downloader) if ed.can_download(info_dict, external_downloader): return ed if protocol == 'http_dash_segments': if info_dict.get('is_live') and (external_downloader or '').lower() != 'native': return FFmpegFD if protocol in ('m3u8', 'm3u8_native'): if info_dict.get('is_live'): return FFmpegFD elif (external_downloader or '').lower() == 'native': return HlsFD elif protocol == 'm3u8_native' and get_suitable_downloader( info_dict, params, None, protocol='m3u8_frag_urls', to_stdout=info_dict['to_stdout']): return HlsFD elif params.get('hls_prefer_native') is True: return HlsFD elif params.get('hls_prefer_native') is False: return FFmpegFD return PROTOCOL_MAP.get(protocol, default) __all__ = [ 'FileDownloader', 'get_suitable_downloader', 'shorten_protocol_name', ] yt-dlp-2024.09.27/yt_dlp/downloader/common.py000066400000000000000000000463511467563447100206570ustar00rootroot00000000000000import contextlib import errno import functools import os import random import re import threading import time from ..minicurses import ( BreaklineStatusPrinter, MultilineLogger, MultilinePrinter, QuietMultilinePrinter, ) from ..utils import ( IDENTITY, NO_DEFAULT, LockingUnsupportedError, Namespace, RetryManager, classproperty, decodeArgument, deprecation_warning, encodeFilename, format_bytes, join_nonempty, parse_bytes, remove_start, sanitize_open, shell_quote, timeconvert, timetuple_from_msec, try_call, ) class FileDownloader: """File Downloader class. File downloader objects are the ones responsible of downloading the actual video file and writing it to disk. File downloaders accept a lot of parameters. In order not to saturate the object constructor with arguments, it receives a dictionary of options instead. Available options: verbose: Print additional info to stdout. quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) retries: Number of times to retry for expected network errors. Default is 0 for API, but 10 for CLI file_access_retries: Number of times to retry on file access error (default: 3) buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. progress_delta: The minimum time between progress output, in seconds external_downloader_args: A dictionary of downloader keys (in lower case) and a list of additional command-line arguments for the executable. Use 'default' as the name for arguments to be passed to all downloaders. For compatibility with youtube-dl, a single list of args can also be used hls_use_mpegts: Use the mpegts container for HLS videos. http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) progress_template: See YoutubeDL.py retry_sleep_functions: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ _TEST_FILE_SIZE = 10241 params = None def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" self._set_ydl(ydl) self._progress_hooks = [] self.params = params self._prepare_multiline_status() self.add_progress_hook(self.report_progress) if self.params.get('progress_delta'): self._progress_delta_lock = threading.Lock() self._progress_delta_time = time.monotonic() def _set_ydl(self, ydl): self.ydl = ydl for func in ( 'deprecation_warning', 'deprecated_feature', 'report_error', 'report_file_already_downloaded', 'report_warning', 'to_console_title', 'to_stderr', 'trouble', 'write_debug', ): if not hasattr(self, func): setattr(self, func, getattr(ydl, func)) def to_screen(self, *args, **kargs): self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) __to_screen = to_screen @classproperty def FD_NAME(cls): return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower() @staticmethod def format_seconds(seconds): if seconds is None: return ' Unknown' time = timetuple_from_msec(seconds * 1000) if time.hours > 99: return '--:--:--' return '%02d:%02d:%02d' % time[:-1] @classmethod def format_eta(cls, seconds): return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}' @staticmethod def calc_percent(byte_counter, data_len): if data_len is None: return None return float(byte_counter) / float(data_len) * 100.0 @staticmethod def format_percent(percent): return ' N/A%' if percent is None else f'{percent:>5.1f}%' @classmethod def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT): if total is NO_DEFAULT: rate, remaining = start_or_rate, now_or_remaining if None in (rate, remaining): return None return int(float(remaining) / rate) start, now = start_or_rate, now_or_remaining if total is None: return None if now is None: now = time.time() rate = cls.calc_speed(start, now, current) return rate and int((float(total) - float(current)) / rate) @staticmethod def calc_speed(start, now, bytes): dif = now - start if bytes == 0 or dif < 0.001: # One millisecond return None return float(bytes) / dif @staticmethod def format_speed(speed): return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s' @staticmethod def format_retries(retries): return 'inf' if retries == float('inf') else int(retries) @staticmethod def filesize_or_none(unencoded_filename): if os.path.isfile(unencoded_filename): return os.path.getsize(unencoded_filename) return 0 @staticmethod def best_block_size(elapsed_time, bytes): new_min = max(bytes / 2.0, 1.0) new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB if elapsed_time < 0.001: return int(new_max) rate = bytes / elapsed_time if rate > new_max: return int(new_max) if rate < new_min: return int(new_min) return int(rate) @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') return parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" rate_limit = self.params.get('ratelimit') if rate_limit is None or byte_counter == 0: return if now is None: now = time.time() elapsed = now - start_time if elapsed <= 0.0: return speed = float(byte_counter) / elapsed if speed > rate_limit: sleep_time = float(byte_counter) / rate_limit - elapsed if sleep_time > 0: time.sleep(sleep_time) def temp_name(self, filename): """Returns a temporary filename for the given filename.""" if self.params.get('nopart', False) or filename == '-' or \ (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): return filename return filename + '.part' def undo_temp_name(self, filename): if filename.endswith('.part'): return filename[:-len('.part')] return filename def ytdl_filename(self, filename): return filename + '.ytdl' def wrap_file_access(action, *, fatal=False): def error_callback(err, count, retries, *, fd): return RetryManager.report_retry( err, count, retries, info=fd.__to_screen, warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')), error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'), sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) def wrapper(self, func, *args, **kwargs): for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self): try: return func(self, *args, **kwargs) except OSError as err: if err.errno in (errno.EACCES, errno.EINVAL): retry.error = err continue retry.error_callback(err, 1, 0) return functools.partial(functools.partialmethod, wrapper) @wrap_file_access('open', fatal=True) def sanitize_open(self, filename, open_mode): f, filename = sanitize_open(filename, open_mode) if not getattr(f, 'locked', None): self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) return f, filename @wrap_file_access('remove') def try_remove(self, filename): if os.path.isfile(filename): os.remove(filename) @wrap_file_access('rename') def try_rename(self, old_filename, new_filename): if old_filename == new_filename: return os.replace(old_filename, new_filename) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" if last_modified_hdr is None: return if not os.path.isfile(encodeFilename(filename)): return timestr = last_modified_hdr if timestr is None: return filetime = timeconvert(timestr) if filetime is None: return filetime # Ignore obviously invalid dates if filetime == 0: return with contextlib.suppress(Exception): os.utime(filename, (time.time(), filetime)) return filetime def report_destination(self, filename): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) def _prepare_multiline_status(self, lines=1): if self.params.get('noprogress'): self._multiline = QuietMultilinePrinter() elif self.ydl.params.get('logger'): self._multiline = MultilineLogger(self.ydl.params['logger'], lines) elif self.params.get('progress_with_newline'): self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) else: self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color' self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out def _finish_multiline_status(self): self._multiline.end() ProgressStyles = Namespace( downloaded_bytes='light blue', percent='light blue', eta='yellow', speed='green', elapsed='bold white', total_bytes='', total_bytes_estimate='', ) def _report_progress_status(self, s, default_template): for name, style in self.ProgressStyles.items_: name = f'_{name}_str' if name not in s: continue s[name] = self._format_progress(s[name], style) s['_default_template'] = default_template % s progress_dict = s.copy() progress_dict.pop('info_dict') progress_dict = {'info': s['info_dict'], 'progress': progress_dict} progress_template = self.params.get('progress_template', {}) self._multiline.print_at_line(self.ydl.evaluate_outtmpl( progress_template.get('download') or '[download] %(progress._default_template)s', progress_dict), s.get('progress_idx') or 0) self.to_console_title(self.ydl.evaluate_outtmpl( progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', progress_dict)) def _format_progress(self, *args, **kwargs): return self.ydl._format_text( self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) def report_progress(self, s): def with_fields(*tups, default=''): for *fields, tmpl in tups: if all(s.get(f) is not None for f in fields): return tmpl return default _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}' if s['status'] == 'finished': if self.params.get('noprogress'): self.to_screen('[download] Download completed') speed = try_call(lambda: s['total_bytes'] / s['elapsed']) s.update({ 'speed': speed, '_speed_str': self.format_speed(speed).strip(), '_total_bytes_str': _format_bytes('total_bytes'), '_elapsed_str': self.format_seconds(s.get('elapsed')), '_percent_str': self.format_percent(100), }) self._report_progress_status(s, join_nonempty( '100%%', with_fields(('total_bytes', 'of %(_total_bytes_str)s')), with_fields(('elapsed', 'in %(_elapsed_str)s')), with_fields(('speed', 'at %(_speed_str)s')), delim=' ')) if s['status'] != 'downloading': return if update_delta := self.params.get('progress_delta'): with self._progress_delta_lock: if time.monotonic() < self._progress_delta_time: return self._progress_delta_time += update_delta s.update({ '_eta_str': self.format_eta(s.get('eta')).strip(), '_speed_str': self.format_speed(s.get('speed')), '_percent_str': self.format_percent(try_call( lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], lambda: s['downloaded_bytes'] == 0 and 0)), '_total_bytes_str': _format_bytes('total_bytes'), '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), '_elapsed_str': self.format_seconds(s.get('elapsed')), }) msg_template = with_fields( ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'), ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'), ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'), ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'), default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s') msg_template += with_fields( ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'), ('fragment_index', ' (frag %(fragment_index)s)')) self._report_progress_status(s, msg_template) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" self.to_screen(f'[download] Resuming download at byte {resume_len}') def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): """Report retry""" is_frag = False if frag_index is NO_DEFAULT else 'fragment' RetryManager.report_retry( err, count, retries, info=self.__to_screen, warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'), error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'), sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'), suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None) def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') @staticmethod def supports_manifest(manifest): """ Whether the downloader can download the fragments from the manifest. Redefine in subclasses if needed. """ pass def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ nooverwrites_and_exists = ( not self.params.get('overwrites', True) and os.path.exists(encodeFilename(filename)) ) if not hasattr(filename, 'write'): continuedl_and_exists = ( self.params.get('continuedl', True) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False) ) # Check file already present if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): self.report_file_already_downloaded(filename) self._hook_progress({ 'filename': filename, 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), }, info_dict) self._finish_multiline_status() return True, False if subtitle: sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: min_sleep_interval = self.params.get('sleep_interval') or 0 sleep_interval = random.uniform( min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) if sleep_interval > 0: self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') time.sleep(sleep_interval) ret = self.real_download(filename, info_dict) self._finish_multiline_status() return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" raise NotImplementedError('This method must be implemented by subclasses') def _hook_progress(self, status, info_dict): # Ideally we want to make a copy of the dict, but that is too slow status['info_dict'] = info_dict # youtube-dl passes the same status object to all the hooks. # Some third party scripts seems to be relying on this. # So keep this behavior if possible for ph in self._progress_hooks: ph(status) def add_progress_hook(self, ph): # See YoutubeDl.py (search for progress_hooks) for a description of # this interface self._progress_hooks.append(ph) def _debug_cmd(self, args, exe=None): if not self.params.get('verbose', False): return str_args = [decodeArgument(a) for a in args] if exe is None: exe = os.path.basename(str_args[0]) self.write_debug(f'{exe} command line: {shell_quote(str_args)}') yt-dlp-2024.09.27/yt_dlp/downloader/dash.py000066400000000000000000000070441467563447100203020ustar00rootroot00000000000000import time import urllib.parse from . import get_suitable_downloader from .fragment import FragmentFD from ..utils import update_url_query, urljoin class DashSegmentsFD(FragmentFD): """ Download segments in a DASH manifest. External downloaders can take over the fragment downloads by supporting the 'dash_frag_urls' protocol """ FD_NAME = 'dashsegments' def real_download(self, filename, info_dict): if 'http_dash_segments_generator' in info_dict['protocol'].split('+'): real_downloader = None # No external FD can support --live-from-start else: if info_dict.get('is_live'): self.report_error('Live DASH videos are not supported') real_downloader = get_suitable_downloader( info_dict, self.params, None, protocol='dash_frag_urls', to_stdout=(filename == '-')) real_start = time.time() requested_formats = [{**info_dict, **fmt} for fmt in info_dict.get('requested_formats', [])] args = [] for fmt in requested_formats or [info_dict]: try: fragment_count = 1 if self.params.get('test') else len(fmt['fragments']) except TypeError: fragment_count = None ctx = { 'filename': fmt.get('filepath') or filename, 'live': 'is_from_start' if fmt.get('is_from_start') else fmt.get('is_live'), 'total_frags': fragment_count, } if real_downloader: self._prepare_external_frag_download(ctx) else: self._prepare_and_start_frag_download(ctx, fmt) ctx['start'] = real_start extra_query = None extra_param_to_segment_url = info_dict.get('extra_param_to_segment_url') if extra_param_to_segment_url: extra_query = urllib.parse.parse_qs(extra_param_to_segment_url) fragments_to_download = self._get_fragments(fmt, ctx, extra_query) if real_downloader: self.to_screen( f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') info_dict['fragments'] = list(fragments_to_download) fd = real_downloader(self.ydl, self.params) return fd.real_download(filename, info_dict) args.append([ctx, fragments_to_download, fmt]) return self.download_and_append_fragments_multiple(*args, is_fatal=lambda idx: idx == 0) def _resolve_fragments(self, fragments, ctx): fragments = fragments(ctx) if callable(fragments) else fragments return [next(iter(fragments))] if self.params.get('test') else fragments def _get_fragments(self, fmt, ctx, extra_query): fragment_base_url = fmt.get('fragment_base_url') fragments = self._resolve_fragments(fmt['fragments'], ctx) frag_index = 0 for i, fragment in enumerate(fragments): frag_index += 1 if frag_index <= ctx['fragment_index']: continue fragment_url = fragment.get('url') if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) if extra_query: fragment_url = update_url_query(fragment_url, extra_query) yield { 'frag_index': frag_index, 'fragment_count': fragment.get('fragment_count'), 'index': i, 'url': fragment_url, } yt-dlp-2024.09.27/yt_dlp/downloader/external.py000066400000000000000000000666541467563447100212210ustar00rootroot00000000000000import enum import functools import json import os import re import subprocess import sys import tempfile import time import uuid from .fragment import FragmentFD from ..networking import Request from ..postprocessor.ffmpeg import EXT_TO_OUT_FORMATS, FFmpegPostProcessor from ..utils import ( Popen, RetryManager, _configuration_args, check_executable, classproperty, cli_bool_option, cli_option, cli_valueless_option, determine_ext, encodeArgument, encodeFilename, find_available_port, remove_end, traverse_obj, ) class Features(enum.Enum): TO_STDOUT = enum.auto() MULTIPLE_FORMATS = enum.auto() class ExternalFD(FragmentFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps') SUPPORTED_FEATURES = () _CAPTURE_STDERR = True def real_download(self, filename, info_dict): self.report_destination(filename) tmpfilename = self.temp_name(filename) self._cookies_tempfile = None try: started = time.time() retval = self._call_downloader(tmpfilename, info_dict) except KeyboardInterrupt: if not info_dict.get('is_live'): raise # Live stream downloading cancellation should be considered as # correct and expected termination thus all postprocessing # should take place retval = 0 self.to_screen(f'[{self.get_basename()}] Interrupted by user') finally: if self._cookies_tempfile: self.try_remove(self._cookies_tempfile) if retval == 0: status = { 'filename': filename, 'status': 'finished', 'elapsed': time.time() - started, } if filename != '-': fsize = os.path.getsize(encodeFilename(tmpfilename)) self.try_rename(tmpfilename, filename) status.update({ 'downloaded_bytes': fsize, 'total_bytes': fsize, }) self._hook_progress(status, info_dict) return True else: self.to_stderr('\n') self.report_error('%s exited with code %d' % ( self.get_basename(), retval)) return False @classmethod def get_basename(cls): return cls.__name__[:-2].lower() @classproperty def EXE_NAME(cls): return cls.get_basename() @functools.cached_property def exe(self): return self.EXE_NAME @classmethod def available(cls, path=None): path = check_executable( cls.EXE_NAME if path in (None, cls.get_basename()) else path, [cls.AVAILABLE_OPT]) if not path: return False cls.exe = path return path @classmethod def supports(cls, info_dict): return all(( not info_dict.get('to_stdout') or Features.TO_STDOUT in cls.SUPPORTED_FEATURES, '+' not in info_dict['protocol'] or Features.MULTIPLE_FORMATS in cls.SUPPORTED_FEATURES, not traverse_obj(info_dict, ('hls_aes', ...), 'extra_param_to_segment_url', 'extra_param_to_key_url'), all(proto in cls.SUPPORTED_PROTOCOLS for proto in info_dict['protocol'].split('+')), )) @classmethod def can_download(cls, info_dict, path=None): return cls.available(path) and cls.supports(info_dict) def _option(self, command_option, param): return cli_option(self.params, command_option, param) def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None): return cli_bool_option(self.params, command_option, param, true_value, false_value, separator) def _valueless_option(self, command_option, param, expected_value=True): return cli_valueless_option(self.params, command_option, param, expected_value) def _configuration_args(self, keys=None, *args, **kwargs): return _configuration_args( self.get_basename(), self.params.get('external_downloader_args'), self.EXE_NAME, keys, *args, **kwargs) def _write_cookies(self): if not self.ydl.cookiejar.filename: tmp_cookies = tempfile.NamedTemporaryFile(suffix='.cookies', delete=False) tmp_cookies.close() self._cookies_tempfile = tmp_cookies.name self.to_screen(f'[download] Writing temporary cookies file to "{self._cookies_tempfile}"') # real_download resets _cookies_tempfile; if it's None then save() will write to cookiejar.filename self.ydl.cookiejar.save(self._cookies_tempfile) return self.ydl.cookiejar.filename or self._cookies_tempfile def _call_downloader(self, tmpfilename, info_dict): """ Either overwrite this or implement _make_cmd """ cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)] self._debug_cmd(cmd) if 'fragments' not in info_dict: _, stderr, returncode = self._call_process(cmd, info_dict) if returncode and stderr: self.to_stderr(stderr) return returncode skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=None, fatal=not skip_unavailable_fragments) for retry in retry_manager: _, stderr, returncode = self._call_process(cmd, info_dict) if not returncode: break # TODO: Decide whether to retry based on error code # https://aria2.github.io/manual/en/html/aria2c.html#exit-status if stderr: self.to_stderr(stderr) retry.error = Exception() continue if not skip_unavailable_fragments and retry_manager.error: return -1 decrypt_fragment = self.decrypter(info_dict) dest, _ = self.sanitize_open(tmpfilename, 'wb') for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = f'{tmpfilename}-Frag{frag_index}' try: src, _ = self.sanitize_open(fragment_filename, 'rb') except OSError as err: if skip_unavailable_fragments and frag_index > 1: self.report_skip_fragment(frag_index, err) continue self.report_error(f'Unable to open fragment {frag_index}; {err}') return -1 dest.write(decrypt_fragment(fragment, src.read())) src.close() if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(fragment_filename)) dest.close() self.try_remove(encodeFilename(f'{tmpfilename}.frag.urls')) return 0 def _call_process(self, cmd, info_dict): return Popen.run(cmd, text=True, stderr=subprocess.PIPE if self._CAPTURE_STDERR else None) class CurlFD(ExternalFD): AVAILABLE_OPT = '-V' _CAPTURE_STDERR = False # curl writes the progress to stderr def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '--location', '-o', tmpfilename, '--compressed'] cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) if cookie_header: cmd += ['--cookie', cookie_header] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] cmd += self._bool_option('--continue-at', 'continuedl', '-', '0') cmd += self._valueless_option('--silent', 'noprogress') cmd += self._valueless_option('--verbose', 'verbose') cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--retry', 'retries') if len(retry) == 2: if retry[1] in ('inf', 'infinite'): retry[1] = '2147483647' cmd += retry cmd += self._option('--max-filesize', 'max_filesize') cmd += self._option('--interface', 'source_address') cmd += self._option('--proxy', 'proxy') cmd += self._valueless_option('--insecure', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class AxelFD(ExternalFD): AVAILABLE_OPT = '-V' def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-o', tmpfilename] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['-H', f'{key}: {val}'] cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) if cookie_header: cmd += ['-H', f'Cookie: {cookie_header}', '--max-redirect=0'] cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class WgetFD(ExternalFD): AVAILABLE_OPT = '--version' def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-O', tmpfilename, '-nv', '--compression=auto'] if self.ydl.cookiejar.get_cookie_header(info_dict['url']): cmd += ['--load-cookies', self._write_cookies()] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] cmd += self._option('--limit-rate', 'ratelimit') retry = self._option('--tries', 'retries') if len(retry) == 2: if retry[1] in ('inf', 'infinite'): retry[1] = '0' cmd += retry cmd += self._option('--bind-address', 'source_address') proxy = self.params.get('proxy') if proxy: for var in ('http_proxy', 'https_proxy'): cmd += ['--execute', f'{var}={proxy}'] cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate') cmd += self._configuration_args() cmd += ['--', info_dict['url']] return cmd class Aria2cFD(ExternalFD): AVAILABLE_OPT = '-v' SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'dash_frag_urls', 'm3u8_frag_urls') @staticmethod def supports_manifest(manifest): UNSUPPORTED_FEATURES = [ r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [1] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 ] check_results = (not re.search(feature, manifest) for feature in UNSUPPORTED_FEATURES) return all(check_results) @staticmethod def _aria2c_filename(fn): return fn if os.path.isabs(fn) else f'.{os.path.sep}{fn}' def _call_downloader(self, tmpfilename, info_dict): # FIXME: Disabled due to https://github.com/yt-dlp/yt-dlp/issues/5931 if False and 'no-external-downloader-progress' not in self.params.get('compat_opts', []): info_dict['__rpc'] = { 'port': find_available_port() or 19190, 'secret': str(uuid.uuid4()), } return super()._call_downloader(tmpfilename, info_dict) def _make_cmd(self, tmpfilename, info_dict): cmd = [self.exe, '-c', '--no-conf', '--console-log-level=warn', '--summary-interval=0', '--download-result=hide', '--http-accept-gzip=true', '--file-allocation=none', '-x16', '-j16', '-s16'] if 'fragments' in info_dict: cmd += ['--allow-overwrite=true', '--allow-piece-length-change=true'] else: cmd += ['--min-split-size', '1M'] if self.ydl.cookiejar.get_cookie_header(info_dict['url']): cmd += [f'--load-cookies={self._write_cookies()}'] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += ['--header', f'{key}: {val}'] cmd += self._option('--max-overall-download-limit', 'ratelimit') cmd += self._option('--interface', 'source_address') cmd += self._option('--all-proxy', 'proxy') cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=') cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=') cmd += self._bool_option('--show-console-readout', 'noprogress', 'false', 'true', '=') cmd += self._configuration_args() if '__rpc' in info_dict: cmd += [ '--enable-rpc', f'--rpc-listen-port={info_dict["__rpc"]["port"]}', f'--rpc-secret={info_dict["__rpc"]["secret"]}'] # aria2c strips out spaces from the beginning/end of filenames and paths. # We work around this issue by adding a "./" to the beginning of the # filename and relative path, and adding a "/" at the end of the path. # See: https://github.com/yt-dlp/yt-dlp/issues/276 # https://github.com/ytdl-org/youtube-dl/issues/20312 # https://github.com/aria2/aria2/issues/1373 dn = os.path.dirname(tmpfilename) if dn: cmd += ['--dir', self._aria2c_filename(dn) + os.path.sep] if 'fragments' not in info_dict: cmd += ['--out', self._aria2c_filename(os.path.basename(tmpfilename))] cmd += ['--auto-file-renaming=false'] if 'fragments' in info_dict: cmd += ['--uri-selector=inorder'] url_list_file = f'{tmpfilename}.frag.urls' url_list = [] for frag_index, fragment in enumerate(info_dict['fragments']): fragment_filename = f'{os.path.basename(tmpfilename)}-Frag{frag_index}' url_list.append('{}\n\tout={}'.format(fragment['url'], self._aria2c_filename(fragment_filename))) stream, _ = self.sanitize_open(url_list_file, 'wb') stream.write('\n'.join(url_list).encode()) stream.close() cmd += ['-i', self._aria2c_filename(url_list_file)] else: cmd += ['--', info_dict['url']] return cmd def aria2c_rpc(self, rpc_port, rpc_secret, method, params=()): # Does not actually need to be UUID, just unique sanitycheck = str(uuid.uuid4()) d = json.dumps({ 'jsonrpc': '2.0', 'id': sanitycheck, 'method': method, 'params': [f'token:{rpc_secret}', *params], }).encode() request = Request( f'http://localhost:{rpc_port}/jsonrpc', data=d, headers={ 'Content-Type': 'application/json', 'Content-Length': f'{len(d)}', }, proxies={'all': None}) with self.ydl.urlopen(request) as r: resp = json.load(r) assert resp.get('id') == sanitycheck, 'Something went wrong with RPC server' return resp['result'] def _call_process(self, cmd, info_dict): if '__rpc' not in info_dict: return super()._call_process(cmd, info_dict) send_rpc = functools.partial(self.aria2c_rpc, info_dict['__rpc']['port'], info_dict['__rpc']['secret']) started = time.time() fragmented = 'fragments' in info_dict frag_count = len(info_dict['fragments']) if fragmented else 1 status = { 'filename': info_dict.get('_filename'), 'status': 'downloading', 'elapsed': 0, 'downloaded_bytes': 0, 'fragment_count': frag_count if fragmented else None, 'fragment_index': 0 if fragmented else None, } self._hook_progress(status, info_dict) def get_stat(key, *obj, average=False): val = tuple(filter(None, map(float, traverse_obj(obj, (..., ..., key))))) or [0] return sum(val) / (len(val) if average else 1) with Popen(cmd, text=True, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE) as p: # Add a small sleep so that RPC client can receive response, # or the connection stalls infinitely time.sleep(0.2) retval = p.poll() while retval is None: # We don't use tellStatus as we won't know the GID without reading stdout # Ref: https://aria2.github.io/manual/en/html/aria2c.html#aria2.tellActive active = send_rpc('aria2.tellActive') completed = send_rpc('aria2.tellStopped', [0, frag_count]) downloaded = get_stat('totalLength', completed) + get_stat('completedLength', active) speed = get_stat('downloadSpeed', active) total = frag_count * get_stat('totalLength', active, completed, average=True) if total < downloaded: total = None status.update({ 'downloaded_bytes': int(downloaded), 'speed': speed, 'total_bytes': None if fragmented else total, 'total_bytes_estimate': total, 'eta': (total - downloaded) / (speed or 1), 'fragment_index': min(frag_count, len(completed) + 1) if fragmented else None, 'elapsed': time.time() - started, }) self._hook_progress(status, info_dict) if not active and len(completed) >= frag_count: send_rpc('aria2.shutdown') retval = p.wait() break time.sleep(0.1) retval = p.poll() return '', p.stderr.read(), retval class HttpieFD(ExternalFD): AVAILABLE_OPT = '--version' EXE_NAME = 'http' def _make_cmd(self, tmpfilename, info_dict): cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']] if info_dict.get('http_headers') is not None: for key, val in info_dict['http_headers'].items(): cmd += [f'{key}:{val}'] # httpie 3.1.0+ removes the Cookie header on redirect, so this should be safe for now. [1] # If we ever need cookie handling for redirects, we can export the cookiejar into a session. [2] # 1: https://github.com/httpie/httpie/security/advisories/GHSA-9w4w-cpc8-h2fq # 2: https://httpie.io/docs/cli/sessions cookie_header = self.ydl.cookiejar.get_cookie_header(info_dict['url']) if cookie_header: cmd += [f'Cookie:{cookie_header}'] return cmd class FFmpegFD(ExternalFD): SUPPORTED_PROTOCOLS = ('http', 'https', 'ftp', 'ftps', 'm3u8', 'm3u8_native', 'rtsp', 'rtmp', 'rtmp_ffmpeg', 'mms', 'http_dash_segments') SUPPORTED_FEATURES = (Features.TO_STDOUT, Features.MULTIPLE_FORMATS) @classmethod def available(cls, path=None): # TODO: Fix path for ffmpeg # Fixme: This may be wrong when --ffmpeg-location is used return FFmpegPostProcessor().available def on_process_started(self, proc, stdin): """ Override this in subclasses """ pass @classmethod def can_merge_formats(cls, info_dict, params): return ( info_dict.get('requested_formats') and info_dict.get('protocol') and not params.get('allow_unplayable_formats') and 'no-direct-merge' not in params.get('compat_opts', []) and cls.can_download(info_dict)) def _call_downloader(self, tmpfilename, info_dict): ffpp = FFmpegPostProcessor(downloader=self) if not ffpp.available: self.report_error('m3u8 download detected but ffmpeg could not be found. Please install') return False ffpp.check_version() args = [ffpp.executable, '-y'] for log_level in ('quiet', 'verbose'): if self.params.get(log_level, False): args += ['-loglevel', log_level] break if not self.params.get('verbose'): args += ['-hide_banner'] args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args', ...)) # These exists only for compatibility. Extractors should use # info_dict['downloader_options']['ffmpeg_args'] instead args += info_dict.get('_ffmpeg_args') or [] seekable = info_dict.get('_seekable') if seekable is not None: # setting -seekable prevents ffmpeg from guessing if the server # supports seeking(by adding the header `Range: bytes=0-`), which # can cause problems in some cases # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127 # http://trac.ffmpeg.org/ticket/6125#comment:10 args += ['-seekable', '1' if seekable else '0'] env = None proxy = self.params.get('proxy') if proxy: if not re.match(r'[\da-zA-Z]+://', proxy): proxy = f'http://{proxy}' if proxy.startswith('socks'): self.report_warning( f'{self.get_basename()} does not support SOCKS proxies. Downloading is likely to fail. ' 'Consider adding --hls-prefer-native to your command.') # Since December 2015 ffmpeg supports -http_proxy option (see # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd) # We could switch to the following code if we are able to detect version properly # args += ['-http_proxy', proxy] env = os.environ.copy() env['HTTP_PROXY'] = proxy env['http_proxy'] = proxy protocol = info_dict.get('protocol') if protocol == 'rtmp': player_url = info_dict.get('player_url') page_url = info_dict.get('page_url') app = info_dict.get('app') play_path = info_dict.get('play_path') tc_url = info_dict.get('tc_url') flash_version = info_dict.get('flash_version') live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn') if player_url is not None: args += ['-rtmp_swfverify', player_url] if page_url is not None: args += ['-rtmp_pageurl', page_url] if app is not None: args += ['-rtmp_app', app] if play_path is not None: args += ['-rtmp_playpath', play_path] if tc_url is not None: args += ['-rtmp_tcurl', tc_url] if flash_version is not None: args += ['-rtmp_flashver', flash_version] if live: args += ['-rtmp_live', 'live'] if isinstance(conn, list): for entry in conn: args += ['-rtmp_conn', entry] elif isinstance(conn, str): args += ['-rtmp_conn', conn] start_time, end_time = info_dict.get('section_start') or 0, info_dict.get('section_end') selected_formats = info_dict.get('requested_formats') or [info_dict] for i, fmt in enumerate(selected_formats): is_http = re.match(r'https?://', fmt['url']) cookies = self.ydl.cookiejar.get_cookies_for_url(fmt['url']) if is_http else [] if cookies: args.extend(['-cookies', ''.join( f'{cookie.name}={cookie.value}; path={cookie.path}; domain={cookie.domain};\r\n' for cookie in cookies)]) if fmt.get('http_headers') and is_http: # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv: # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header. args.extend(['-headers', ''.join(f'{key}: {val}\r\n' for key, val in fmt['http_headers'].items())]) if start_time: args += ['-ss', str(start_time)] if end_time: args += ['-t', str(end_time - start_time)] args += [*self._configuration_args((f'_i{i + 1}', '_i')), '-i', fmt['url']] if not (start_time or end_time) or not self.params.get('force_keyframes_at_cuts'): args += ['-c', 'copy'] if info_dict.get('requested_formats') or protocol == 'http_dash_segments': for i, fmt in enumerate(selected_formats): stream_number = fmt.get('manifest_stream_number', 0) args.extend(['-map', f'{i}:{stream_number}']) if self.params.get('test', False): args += ['-fs', str(self._TEST_FILE_SIZE)] ext = info_dict['ext'] if protocol in ('m3u8', 'm3u8_native'): use_mpegts = (tmpfilename == '-') or self.params.get('hls_use_mpegts') if use_mpegts is None: use_mpegts = info_dict.get('is_live') if use_mpegts: args += ['-f', 'mpegts'] else: args += ['-f', 'mp4'] if (ffpp.basename == 'ffmpeg' and ffpp._features.get('needs_adtstoasc')) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')): args += ['-bsf:a', 'aac_adtstoasc'] elif protocol == 'rtmp': args += ['-f', 'flv'] elif ext == 'mp4' and tmpfilename == '-': args += ['-f', 'mpegts'] elif ext == 'unknown_video': ext = determine_ext(remove_end(tmpfilename, '.part')) if ext == 'unknown_video': self.report_warning( 'The video format is unknown and cannot be downloaded by ffmpeg. ' 'Explicitly set the extension in the filename to attempt download in that format') else: self.report_warning(f'The video format is unknown. Trying to download as {ext} according to the filename') args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] else: args += ['-f', EXT_TO_OUT_FORMATS.get(ext, ext)] args += traverse_obj(info_dict, ('downloader_options', 'ffmpeg_args_out', ...)) args += self._configuration_args(('_o1', '_o', '')) args = [encodeArgument(opt) for opt in args] args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True)) self._debug_cmd(args) piped = any(fmt['url'] in ('-', 'pipe:') for fmt in selected_formats) with Popen(args, stdin=subprocess.PIPE, env=env) as proc: if piped: self.on_process_started(proc, proc.stdin) try: retval = proc.wait() except BaseException as e: # subprocces.run would send the SIGKILL signal to ffmpeg and the # mp4 file couldn't be played, but if we ask ffmpeg to quit it # produces a file that is playable (this is mostly useful for live # streams). Note that Windows is not affected and produces playable # files (see https://github.com/ytdl-org/youtube-dl/issues/8300). if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32' and not piped: proc.communicate_or_kill(b'q') else: proc.kill(timeout=None) raise return retval class AVconvFD(FFmpegFD): pass _BY_NAME = { klass.get_basename(): klass for name, klass in globals().items() if name.endswith('FD') and name not in ('ExternalFD', 'FragmentFD') } def list_external_downloaders(): return sorted(_BY_NAME.keys()) def get_external_downloader(external_downloader): """ Given the name of the executable, see whether we support the given downloader """ bn = os.path.splitext(os.path.basename(external_downloader))[0] return _BY_NAME.get(bn) or next(( klass for klass in _BY_NAME.values() if klass.EXE_NAME in bn ), None) yt-dlp-2024.09.27/yt_dlp/downloader/f4m.py000066400000000000000000000357531467563447100200610ustar00rootroot00000000000000import base64 import io import itertools import struct import time import urllib.parse from .fragment import FragmentFD from ..compat import compat_etree_fromstring from ..networking.exceptions import HTTPError from ..utils import fix_xml_ampersands, xpath_text class DataTruncatedError(Exception): pass class FlvReader(io.BytesIO): """ Reader for Flv files The file format is documented in https://www.adobe.com/devnet/f4v.html """ def read_bytes(self, n): data = self.read(n) if len(data) < n: raise DataTruncatedError( 'FlvReader error: need %d bytes while only %d bytes got' % ( n, len(data))) return data # Utility functions for reading numbers and strings def read_unsigned_long_long(self): return struct.unpack('!Q', self.read_bytes(8))[0] def read_unsigned_int(self): return struct.unpack('!I', self.read_bytes(4))[0] def read_unsigned_char(self): return struct.unpack('!B', self.read_bytes(1))[0] def read_string(self): res = b'' while True: char = self.read_bytes(1) if char == b'\x00': break res += char return res def read_box_info(self): """ Read a box and return the info as a tuple: (box_size, box_type, box_data) """ real_size = size = self.read_unsigned_int() box_type = self.read_bytes(4) header_end = 8 if size == 1: real_size = self.read_unsigned_long_long() header_end = 16 return real_size, box_type, self.read_bytes(real_size - header_end) def read_asrt(self): # version self.read_unsigned_char() # flags self.read_bytes(3) quality_entry_count = self.read_unsigned_char() # QualityEntryCount for _ in range(quality_entry_count): self.read_string() segment_run_count = self.read_unsigned_int() segments = [] for _ in range(segment_run_count): first_segment = self.read_unsigned_int() fragments_per_segment = self.read_unsigned_int() segments.append((first_segment, fragments_per_segment)) return { 'segment_run': segments, } def read_afrt(self): # version self.read_unsigned_char() # flags self.read_bytes(3) # time scale self.read_unsigned_int() quality_entry_count = self.read_unsigned_char() # QualitySegmentUrlModifiers for _ in range(quality_entry_count): self.read_string() fragments_count = self.read_unsigned_int() fragments = [] for _ in range(fragments_count): first = self.read_unsigned_int() first_ts = self.read_unsigned_long_long() duration = self.read_unsigned_int() if duration == 0: discontinuity_indicator = self.read_unsigned_char() else: discontinuity_indicator = None fragments.append({ 'first': first, 'ts': first_ts, 'duration': duration, 'discontinuity_indicator': discontinuity_indicator, }) return { 'fragments': fragments, } def read_abst(self): # version self.read_unsigned_char() # flags self.read_bytes(3) self.read_unsigned_int() # BootstrapinfoVersion # Profile,Live,Update,Reserved flags = self.read_unsigned_char() live = flags & 0x20 != 0 # time scale self.read_unsigned_int() # CurrentMediaTime self.read_unsigned_long_long() # SmpteTimeCodeOffset self.read_unsigned_long_long() self.read_string() # MovieIdentifier server_count = self.read_unsigned_char() # ServerEntryTable for _ in range(server_count): self.read_string() quality_count = self.read_unsigned_char() # QualityEntryTable for _ in range(quality_count): self.read_string() # DrmData self.read_string() # MetaData self.read_string() segments_count = self.read_unsigned_char() segments = [] for _ in range(segments_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'asrt' segment = FlvReader(box_data).read_asrt() segments.append(segment) fragments_run_count = self.read_unsigned_char() fragments = [] for _ in range(fragments_run_count): box_size, box_type, box_data = self.read_box_info() assert box_type == b'afrt' fragments.append(FlvReader(box_data).read_afrt()) return { 'segments': segments, 'fragments': fragments, 'live': live, } def read_bootstrap_info(self): total_size, box_type, box_data = self.read_box_info() assert box_type == b'abst' return FlvReader(box_data).read_abst() def read_bootstrap_info(bootstrap_bytes): return FlvReader(bootstrap_bytes).read_bootstrap_info() def build_fragments_list(boot_info): """ Return a list of (segment, fragment) for each fragment in the video """ res = [] segment_run_table = boot_info['segments'][0] fragment_run_entry_table = boot_info['fragments'][0]['fragments'] first_frag_number = fragment_run_entry_table[0]['first'] fragments_counter = itertools.count(first_frag_number) for segment, fragments_count in segment_run_table['segment_run']: # In some live HDS streams (e.g. Rai), `fragments_count` is # abnormal and causing out-of-memory errors. It's OK to change the # number of fragments for live streams as they are updated periodically if fragments_count == 4294967295 and boot_info['live']: fragments_count = 2 for _ in range(fragments_count): res.append((segment, next(fragments_counter))) if boot_info['live']: res = res[-2:] return res def write_unsigned_int(stream, val): stream.write(struct.pack('!I', val)) def write_unsigned_int_24(stream, val): stream.write(struct.pack('!I', val)[1:]) def write_flv_header(stream): """Writes the FLV header to stream""" # FLV header stream.write(b'FLV\x01') stream.write(b'\x05') stream.write(b'\x00\x00\x00\x09') stream.write(b'\x00\x00\x00\x00') def write_metadata_tag(stream, metadata): """Writes optional metadata tag to stream""" SCRIPT_TAG = b'\x12' FLV_TAG_HEADER_LEN = 11 if metadata: stream.write(SCRIPT_TAG) write_unsigned_int_24(stream, len(metadata)) stream.write(b'\x00\x00\x00\x00\x00\x00\x00') stream.write(metadata) write_unsigned_int(stream, FLV_TAG_HEADER_LEN + len(metadata)) def remove_encrypted_media(media): return list(filter(lambda e: 'drmAdditionalHeaderId' not in e.attrib and 'drmAdditionalHeaderSetId' not in e.attrib, media)) def _add_ns(prop, ver=1): return '{http://ns.adobe.com/f4m/%d.0}%s' % (ver, prop) def get_base_url(manifest): base_url = xpath_text( manifest, [_add_ns('baseURL'), _add_ns('baseURL', 2)], 'base URL', default=None) if base_url: base_url = base_url.strip() return base_url class F4mFD(FragmentFD): """ A downloader for f4m manifests or AdobeHDS. """ def _get_unencrypted_media(self, doc): media = doc.findall(_add_ns('media')) if not media: self.report_error('No media found') if not self.params.get('allow_unplayable_formats'): for e in (doc.findall(_add_ns('drmAdditionalHeader')) + doc.findall(_add_ns('drmAdditionalHeaderSet'))): # If id attribute is missing it's valid for all media nodes # without drmAdditionalHeaderId or drmAdditionalHeaderSetId attribute if 'id' not in e.attrib: self.report_error('Missing ID in f4m DRM') media = remove_encrypted_media(media) if not media: self.report_error('Unsupported DRM') return media def _get_bootstrap_from_url(self, bootstrap_url): bootstrap = self.ydl.urlopen(bootstrap_url).read() return read_bootstrap_info(bootstrap) def _update_live_fragments(self, bootstrap_url, latest_fragment): fragments_list = [] retries = 30 while (not fragments_list) and (retries > 0): boot_info = self._get_bootstrap_from_url(bootstrap_url) fragments_list = build_fragments_list(boot_info) fragments_list = [f for f in fragments_list if f[1] > latest_fragment] if not fragments_list: # Retry after a while time.sleep(5.0) retries -= 1 if not fragments_list: self.report_error('Failed to update fragments') return fragments_list def _parse_bootstrap_node(self, node, base_url): # Sometimes non empty inline bootstrap info can be specified along # with bootstrap url attribute (e.g. dummy inline bootstrap info # contains whitespace characters in [1]). We will prefer bootstrap # url over inline bootstrap info when present. # 1. http://live-1-1.rutube.ru/stream/1024/HDS/SD/C2NKsS85HQNckgn5HdEmOQ/1454167650/S-s604419906/move/four/dirs/upper/1024-576p.f4m bootstrap_url = node.get('url') if bootstrap_url: bootstrap_url = urllib.parse.urljoin( base_url, bootstrap_url) boot_info = self._get_bootstrap_from_url(bootstrap_url) else: bootstrap_url = None bootstrap = base64.b64decode(node.text) boot_info = read_bootstrap_info(bootstrap) return boot_info, bootstrap_url def real_download(self, filename, info_dict): man_url = info_dict['url'] requested_bitrate = info_dict.get('tbr') self.to_screen(f'[{self.FD_NAME}] Downloading f4m manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url # Some manifests may be malformed, e.g. prosiebensat1 generated manifests # (see https://github.com/ytdl-org/youtube-dl/issues/6215#issuecomment-121704244 # and https://github.com/ytdl-org/youtube-dl/issues/7823) manifest = fix_xml_ampersands(urlh.read().decode('utf-8', 'ignore')).strip() doc = compat_etree_fromstring(manifest) formats = [(int(f.attrib.get('bitrate', -1)), f) for f in self._get_unencrypted_media(doc)] if requested_bitrate is None or len(formats) == 1: # get the best format formats = sorted(formats, key=lambda f: f[0]) rate, media = formats[-1] else: rate, media = next(filter( lambda f: int(f[0]) == requested_bitrate, formats)) # Prefer baseURL for relative URLs as per 11.2 of F4M 3.0 spec. man_base_url = get_base_url(doc) or man_url base_url = urllib.parse.urljoin(man_base_url, media.attrib['url']) bootstrap_node = doc.find(_add_ns('bootstrapInfo')) boot_info, bootstrap_url = self._parse_bootstrap_node( bootstrap_node, man_base_url) live = boot_info['live'] metadata_node = media.find(_add_ns('metadata')) if metadata_node is not None: metadata = base64.b64decode(metadata_node.text) else: metadata = None fragments_list = build_fragments_list(boot_info) test = self.params.get('test', False) if test: # We only download the first fragment fragments_list = fragments_list[:1] total_frags = len(fragments_list) # For some akamai manifests we'll need to add a query to the fragment url akamai_pv = xpath_text(doc, _add_ns('pv-2.0')) ctx = { 'filename': filename, 'total_frags': total_frags, 'live': bool(live), } self._prepare_frag_download(ctx) dest_stream = ctx['dest_stream'] if ctx['complete_frags_downloaded_bytes'] == 0: write_flv_header(dest_stream) if not live: write_metadata_tag(dest_stream, metadata) base_url_parsed = urllib.parse.urlparse(base_url) self._start_frag_download(ctx, info_dict) frag_index = 0 while fragments_list: seg_i, frag_i = fragments_list.pop(0) frag_index += 1 if frag_index <= ctx['fragment_index']: continue name = 'Seg%d-Frag%d' % (seg_i, frag_i) query = [] if base_url_parsed.query: query.append(base_url_parsed.query) if akamai_pv: query.append(akamai_pv.strip(';')) if info_dict.get('extra_param_to_segment_url'): query.append(info_dict['extra_param_to_segment_url']) url_parsed = base_url_parsed._replace(path=base_url_parsed.path + name, query='&'.join(query)) try: success = self._download_fragment(ctx, url_parsed.geturl(), info_dict) if not success: return False down_data = self._read_fragment(ctx) reader = FlvReader(down_data) while True: try: _, box_type, box_data = reader.read_box_info() except DataTruncatedError: if test: # In tests, segments may be truncated, and thus # FlvReader may not be able to parse the whole # chunk. If so, write the segment as is # See https://github.com/ytdl-org/youtube-dl/issues/9214 dest_stream.write(down_data) break raise if box_type == b'mdat': self._append_fragment(ctx, box_data) break except HTTPError as err: if live and (err.status == 404 or err.status == 410): # We didn't keep up with the live window. Continue # with the next available fragment. msg = 'Fragment %d unavailable' % frag_i self.report_warning(msg) fragments_list = [] else: raise if not fragments_list and not test and live and bootstrap_url: fragments_list = self._update_live_fragments(bootstrap_url, frag_i) total_frags += len(fragments_list) if fragments_list and (fragments_list[0][1] > frag_i + 1): msg = 'Missed %d fragments' % (fragments_list[0][1] - (frag_i + 1)) self.report_warning(msg) return self._finish_frag_download(ctx, info_dict) yt-dlp-2024.09.27/yt_dlp/downloader/fc2.py000066400000000000000000000024541467563447100200350ustar00rootroot00000000000000import threading from .common import FileDownloader from .external import FFmpegFD class FC2LiveFD(FileDownloader): """ Downloads FC2 live without being stopped.
Note, this is not a part of public API, and will be removed without notice. DO NOT USE """ def real_download(self, filename, info_dict): ws = info_dict['ws'] heartbeat_lock = threading.Lock() heartbeat_state = [None, 1] def heartbeat(): if heartbeat_state[1] < 0: return try: heartbeat_state[1] += 1 ws.send('{"name":"heartbeat","arguments":{},"id":%d}' % heartbeat_state[1]) except Exception: self.to_screen('[fc2:live] Heartbeat failed') with heartbeat_lock: heartbeat_state[0] = threading.Timer(30, heartbeat) heartbeat_state[0]._daemonic = True heartbeat_state[0].start() heartbeat() new_info_dict = info_dict.copy() new_info_dict.update({ 'ws': None, 'protocol': 'live_ffmpeg', }) try: return FFmpegFD(self.ydl, self.params or {}).download(filename, new_info_dict) finally: # stop heartbeating heartbeat_state[1] = -1 yt-dlp-2024.09.27/yt_dlp/downloader/fragment.py000066400000000000000000000524371467563447100211740ustar00rootroot00000000000000import concurrent.futures import contextlib import json import math import os import struct import time from .common import FileDownloader from .http import HttpFD from ..aes import aes_cbc_decrypt_bytes, unpad_pkcs7 from ..compat import compat_os_name from ..networking import Request from ..networking.exceptions import HTTPError, IncompleteRead from ..utils import DownloadError, RetryManager, encodeFilename, traverse_obj from ..utils.networking import HTTPHeaderDict from ..utils.progress import ProgressCalculator class HttpQuietDownloader(HttpFD): def to_screen(self, *args, **kargs): pass to_console_title = to_screen class FragmentFD(FileDownloader): """ A base file downloader class for fragmented media (e.g. f4m/m3u8 manifests). Available options: fragment_retries: Number of times to retry a fragment for HTTP error (DASH and hlsnative only). Default is 0 for API, but 10 for CLI skip_unavailable_fragments: Skip unavailable fragments (DASH and hlsnative only) keep_fragments: Keep downloaded fragments on disk after downloading is finished concurrent_fragment_downloads: The number of threads to use for native hls and dash downloads _no_ytdl_file: Don't use .ytdl file For each incomplete fragment download yt-dlp keeps on disk a special bookkeeping file with download state and metadata (in future such files will be used for any incomplete download handled by yt-dlp). This file is used to properly handle resuming, check download file consistency and detect potential errors. The file has a .ytdl extension and represents a standard JSON file of the following format: extractor: Dictionary of extractor related data. TBD. downloader: Dictionary of downloader related data. May contain following data: current_fragment: Dictionary with current (being downloaded) fragment data: index: 0-based index of current fragment among all fragments fragment_count: Total count of fragments This feature is experimental and file format may change in future. """ def report_retry_fragment(self, err, frag_index, count, retries): self.deprecation_warning('yt_dlp.downloader.FragmentFD.report_retry_fragment is deprecated. ' 'Use yt_dlp.downloader.FileDownloader.report_retry instead') return self.report_retry(err, count, retries, frag_index) def report_skip_fragment(self, frag_index, err=None): err = f' {err};' if err else '' self.to_screen(f'[download]{err} Skipping fragment {frag_index:d} ...') def _prepare_url(self, info_dict, url): headers = info_dict.get('http_headers') return Request(url, None, headers) if headers else url def _prepare_and_start_frag_download(self, ctx, info_dict): self._prepare_frag_download(ctx) self._start_frag_download(ctx, info_dict) def __do_ytdl_file(self, ctx): return ctx['live'] is not True and ctx['tmpfilename'] != '-' and not self.params.get('_no_ytdl_file') def _read_ytdl_file(self, ctx): assert 'ytdl_corrupt' not in ctx stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'r') try: ytdl_data = json.loads(stream.read()) ctx['fragment_index'] = ytdl_data['downloader']['current_fragment']['index'] if 'extra_state' in ytdl_data['downloader']: ctx['extra_state'] = ytdl_data['downloader']['extra_state'] except Exception: ctx['ytdl_corrupt'] = True finally: stream.close() def _write_ytdl_file(self, ctx): frag_index_stream, _ = self.sanitize_open(self.ytdl_filename(ctx['filename']), 'w') try: downloader = { 'current_fragment': { 'index': ctx['fragment_index'], }, } if 'extra_state' in ctx: downloader['extra_state'] = ctx['extra_state'] if ctx.get('fragment_count') is not None: downloader['fragment_count'] = ctx['fragment_count'] frag_index_stream.write(json.dumps({'downloader': downloader})) finally: frag_index_stream.close() def _download_fragment(self, ctx, frag_url, info_dict, headers=None, request_data=None): fragment_filename = '%s-Frag%d' % (ctx['tmpfilename'], ctx['fragment_index']) fragment_info_dict = { 'url': frag_url, 'http_headers': headers or info_dict.get('http_headers'), 'request_data': request_data, 'ctx_id': ctx.get('ctx_id'), } frag_resume_len = 0 if ctx['dl'].params.get('continuedl', True): frag_resume_len = self.filesize_or_none(self.temp_name(fragment_filename)) fragment_info_dict['frag_resume_len'] = ctx['frag_resume_len'] = frag_resume_len success, _ = ctx['dl'].download(fragment_filename, fragment_info_dict) if not success: return False if fragment_info_dict.get('filetime'): ctx['fragment_filetime'] = fragment_info_dict.get('filetime') ctx['fragment_filename_sanitized'] = fragment_filename return True def _read_fragment(self, ctx): if not ctx.get('fragment_filename_sanitized'): return None try: down, frag_sanitized = self.sanitize_open(ctx['fragment_filename_sanitized'], 'rb') except FileNotFoundError: if ctx.get('live'): return None raise ctx['fragment_filename_sanitized'] = frag_sanitized frag_content = down.read() down.close() return frag_content def _append_fragment(self, ctx, frag_content): try: ctx['dest_stream'].write(frag_content) ctx['dest_stream'].flush() finally: if self.__do_ytdl_file(ctx): self._write_ytdl_file(ctx) if not self.params.get('keep_fragments', False): self.try_remove(encodeFilename(ctx['fragment_filename_sanitized'])) del ctx['fragment_filename_sanitized'] def _prepare_frag_download(self, ctx): if not ctx.setdefault('live', False): total_frags_str = '%d' % ctx['total_frags'] ad_frags = ctx.get('ad_frags', 0) if ad_frags: total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') self.report_destination(ctx['filename']) dl = HttpQuietDownloader(self.ydl, { **self.params, 'noprogress': True, 'test': False, 'sleep_interval': 0, 'max_sleep_interval': 0, 'sleep_interval_subtitles': 0, }) tmpfilename = self.temp_name(ctx['filename']) open_mode = 'wb' # Establish possible resume length resume_len = self.filesize_or_none(tmpfilename) if resume_len > 0: open_mode = 'ab' # Should be initialized before ytdl file check ctx.update({ 'tmpfilename': tmpfilename, 'fragment_index': 0, }) if self.__do_ytdl_file(ctx): ytdl_file_exists = os.path.isfile(encodeFilename(self.ytdl_filename(ctx['filename']))) continuedl = self.params.get('continuedl', True) if continuedl and ytdl_file_exists: self._read_ytdl_file(ctx) is_corrupt = ctx.get('ytdl_corrupt') is True is_inconsistent = ctx['fragment_index'] > 0 and resume_len == 0 if is_corrupt or is_inconsistent: message = ( '.ytdl file is corrupt' if is_corrupt else 'Inconsistent state of incomplete fragment download') self.report_warning( f'{message}. Restarting from the beginning ...') ctx['fragment_index'] = resume_len = 0 if 'ytdl_corrupt' in ctx: del ctx['ytdl_corrupt'] self._write_ytdl_file(ctx) else: if not continuedl: if ytdl_file_exists: self._read_ytdl_file(ctx) ctx['fragment_index'] = resume_len = 0 self._write_ytdl_file(ctx) assert ctx['fragment_index'] == 0 dest_stream, tmpfilename = self.sanitize_open(tmpfilename, open_mode) ctx.update({ 'dl': dl, 'dest_stream': dest_stream, 'tmpfilename': tmpfilename, # Total complete fragments downloaded so far in bytes 'complete_frags_downloaded_bytes': resume_len, }) def _start_frag_download(self, ctx, info_dict): resume_len = ctx['complete_frags_downloaded_bytes'] total_frags = ctx['total_frags'] ctx_id = ctx.get('ctx_id') # Stores the download progress, updated by the progress hook state = { 'status': 'downloading', 'downloaded_bytes': resume_len, 'fragment_index': ctx['fragment_index'], 'fragment_count': total_frags, 'filename': ctx['filename'], 'tmpfilename': ctx['tmpfilename'], } ctx['started'] = time.time() progress = ProgressCalculator(resume_len) def frag_progress_hook(s): if s['status'] not in ('downloading', 'finished'): return if not total_frags and ctx.get('fragment_count'): state['fragment_count'] = ctx['fragment_count'] if ctx_id is not None and s.get('ctx_id') != ctx_id: return state['max_progress'] = ctx.get('max_progress') state['progress_idx'] = ctx.get('progress_idx') state['elapsed'] = progress.elapsed frag_total_bytes = s.get('total_bytes') or 0 s['fragment_info_dict'] = s.pop('info_dict', {}) # XXX: Fragment resume is not accounted for here if not ctx['live']: estimated_size = ( (ctx['complete_frags_downloaded_bytes'] + frag_total_bytes) / (state['fragment_index'] + 1) * total_frags) progress.total = estimated_size progress.update(s.get('downloaded_bytes')) state['total_bytes_estimate'] = progress.total else: progress.update(s.get('downloaded_bytes')) if s['status'] == 'finished': state['fragment_index'] += 1 ctx['fragment_index'] = state['fragment_index'] progress.thread_reset() state['downloaded_bytes'] = ctx['complete_frags_downloaded_bytes'] = progress.downloaded state['speed'] = ctx['speed'] = progress.speed.smooth state['eta'] = progress.eta.smooth self._hook_progress(state, info_dict) ctx['dl'].add_progress_hook(frag_progress_hook) return ctx['started'] def _finish_frag_download(self, ctx, info_dict): ctx['dest_stream'].close() if self.__do_ytdl_file(ctx): self.try_remove(self.ytdl_filename(ctx['filename'])) elapsed = time.time() - ctx['started'] to_file = ctx['tmpfilename'] != '-' if to_file: downloaded_bytes = self.filesize_or_none(ctx['tmpfilename']) else: downloaded_bytes = ctx['complete_frags_downloaded_bytes'] if not downloaded_bytes: if to_file: self.try_remove(ctx['tmpfilename']) self.report_error('The downloaded file is empty') return False elif to_file: self.try_rename(ctx['tmpfilename'], ctx['filename']) filetime = ctx.get('fragment_filetime') if self.params.get('updatetime', True) and filetime: with contextlib.suppress(Exception): os.utime(ctx['filename'], (time.time(), filetime)) self._hook_progress({ 'downloaded_bytes': downloaded_bytes, 'total_bytes': downloaded_bytes, 'filename': ctx['filename'], 'status': 'finished', 'elapsed': elapsed, 'ctx_id': ctx.get('ctx_id'), 'max_progress': ctx.get('max_progress'), 'progress_idx': ctx.get('progress_idx'), }, info_dict) return True def _prepare_external_frag_download(self, ctx): if 'live' not in ctx: ctx['live'] = False if not ctx['live']: total_frags_str = '%d' % ctx['total_frags'] ad_frags = ctx.get('ad_frags', 0) if ad_frags: total_frags_str += ' (not including %d ad)' % ad_frags else: total_frags_str = 'unknown (live)' self.to_screen(f'[{self.FD_NAME}] Total fragments: {total_frags_str}') tmpfilename = self.temp_name(ctx['filename']) # Should be initialized before ytdl file check ctx.update({ 'tmpfilename': tmpfilename, 'fragment_index': 0, }) def decrypter(self, info_dict): _key_cache = {} def _get_key(url): if url not in _key_cache: _key_cache[url] = self.ydl.urlopen(self._prepare_url(info_dict, url)).read() return _key_cache[url] def decrypt_fragment(fragment, frag_content): if frag_content is None: return decrypt_info = fragment.get('decrypt_info') if not decrypt_info or decrypt_info['METHOD'] != 'AES-128': return frag_content iv = decrypt_info.get('IV') or struct.pack('>8xq', fragment['media_sequence']) decrypt_info['KEY'] = (decrypt_info.get('KEY') or _get_key(traverse_obj(info_dict, ('hls_aes', 'uri')) or decrypt_info['URI'])) # Don't decrypt the content in tests since the data is explicitly truncated and it's not to a valid block # size (see https://github.com/ytdl-org/youtube-dl/pull/27660). Tests only care that the correct data downloaded, # not what it decrypts to. if self.params.get('test', False): return frag_content return unpad_pkcs7(aes_cbc_decrypt_bytes(frag_content, decrypt_info['KEY'], iv)) return decrypt_fragment def download_and_append_fragments_multiple(self, *args, **kwargs): """ @params (ctx1, fragments1, info_dict1), (ctx2, fragments2, info_dict2), ... all args must be either tuple or list """ interrupt_trigger = [True] max_progress = len(args) if max_progress == 1: return self.download_and_append_fragments(*args[0], **kwargs) max_workers = self.params.get('concurrent_fragment_downloads', 1) if max_progress > 1: self._prepare_multiline_status(max_progress) is_live = any(traverse_obj(args, (..., 2, 'is_live'))) def thread_func(idx, ctx, fragments, info_dict, tpe): ctx['max_progress'] = max_progress ctx['progress_idx'] = idx return self.download_and_append_fragments( ctx, fragments, info_dict, **kwargs, tpe=tpe, interrupt_trigger=interrupt_trigger) class FTPE(concurrent.futures.ThreadPoolExecutor): # has to stop this or it's going to wait on the worker thread itself def __exit__(self, exc_type, exc_val, exc_tb): pass if compat_os_name == 'nt': def future_result(future): while True: try: return future.result(0.1) except KeyboardInterrupt: raise except concurrent.futures.TimeoutError: continue else: def future_result(future): return future.result() def interrupt_trigger_iter(fg): for f in fg: if not interrupt_trigger[0]: break yield f spins = [] for idx, (ctx, fragments, info_dict) in enumerate(args): tpe = FTPE(math.ceil(max_workers / max_progress)) job = tpe.submit(thread_func, idx, ctx, interrupt_trigger_iter(fragments), info_dict, tpe) spins.append((tpe, job)) result = True for tpe, job in spins: try: result = result and future_result(job) except KeyboardInterrupt: interrupt_trigger[0] = False finally: tpe.shutdown(wait=True) if not interrupt_trigger[0] and not is_live: raise KeyboardInterrupt # we expect the user wants to stop and DO WANT the preceding postprocessors to run; # so returning a intermediate result here instead of KeyboardInterrupt on live return result def download_and_append_fragments( self, ctx, fragments, info_dict, *, is_fatal=(lambda idx: False), pack_func=(lambda content, idx: content), finish_func=None, tpe=None, interrupt_trigger=(True, )): if not self.params.get('skip_unavailable_fragments', True): is_fatal = lambda _: True def download_fragment(fragment, ctx): if not interrupt_trigger[0]: return frag_index = ctx['fragment_index'] = fragment['frag_index'] ctx['last_error'] = None headers = HTTPHeaderDict(info_dict.get('http_headers')) byte_range = fragment.get('byte_range') if byte_range: headers['Range'] = 'bytes=%d-%d' % (byte_range['start'], byte_range['end'] - 1) # Never skip the first fragment fatal = is_fatal(fragment.get('index') or (frag_index - 1)) def error_callback(err, count, retries): if fatal and count > retries: ctx['dest_stream'].close() self.report_retry(err, count, retries, frag_index, fatal) ctx['last_error'] = err for retry in RetryManager(self.params.get('fragment_retries'), error_callback): try: ctx['fragment_count'] = fragment.get('fragment_count') if not self._download_fragment( ctx, fragment['url'], info_dict, headers, info_dict.get('request_data')): return except (HTTPError, IncompleteRead) as err: retry.error = err continue except DownloadError: # has own retry settings if fatal: raise def append_fragment(frag_content, frag_index, ctx): if frag_content: self._append_fragment(ctx, pack_func(frag_content, frag_index)) elif not is_fatal(frag_index - 1): self.report_skip_fragment(frag_index, 'fragment not found') else: ctx['dest_stream'].close() self.report_error(f'fragment {frag_index} not found, unable to continue') return False return True decrypt_fragment = self.decrypter(info_dict) max_workers = math.ceil( self.params.get('concurrent_fragment_downloads', 1) / ctx.get('max_progress', 1)) if max_workers > 1: def _download_fragment(fragment): ctx_copy = ctx.copy() download_fragment(fragment, ctx_copy) return fragment, fragment['frag_index'], ctx_copy.get('fragment_filename_sanitized') with tpe or concurrent.futures.ThreadPoolExecutor(max_workers) as pool: try: for fragment, frag_index, frag_filename in pool.map(_download_fragment, fragments): ctx.update({ 'fragment_filename_sanitized': frag_filename, 'fragment_index': frag_index, }) if not append_fragment(decrypt_fragment(fragment, self._read_fragment(ctx)), frag_index, ctx): return False except KeyboardInterrupt: self._finish_multiline_status() self.report_error( 'Interrupted by user. Waiting for all threads to shutdown...', is_error=False, tb=False) pool.shutdown(wait=False) raise else: for fragment in fragments: if not interrupt_trigger[0]: break try: download_fragment(fragment, ctx) result = append_fragment( decrypt_fragment(fragment, self._read_fragment(ctx)), fragment['frag_index'], ctx) except KeyboardInterrupt: if info_dict.get('is_live'): break raise if not result: return False if finish_func is not None: ctx['dest_stream'].write(finish_func()) ctx['dest_stream'].flush() return self._finish_frag_download(ctx, info_dict) yt-dlp-2024.09.27/yt_dlp/downloader/hls.py000066400000000000000000000433671467563447100201610ustar00rootroot00000000000000import binascii import io import re import urllib.parse from . import get_suitable_downloader from .external import FFmpegFD from .fragment import FragmentFD from .. import webvtt from ..dependencies import Cryptodome from ..utils import ( bug_reports_message, parse_m3u8_attributes, remove_start, traverse_obj, update_url_query, urljoin, ) class HlsFD(FragmentFD): """ Download segments in a m3u8 manifest. External downloaders can take over the fragment downloads by supporting the 'm3u8_frag_urls' protocol and re-defining 'supports_manifest' function """ FD_NAME = 'hlsnative' @staticmethod def _has_drm(manifest): # TODO: https://github.com/yt-dlp/yt-dlp/pull/5039 return bool(re.search('|'.join(( r'#EXT-X-(?:SESSION-)?KEY:.*?URI="skd://', # Apple FairPlay r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.apple\.streamingkeydelivery"', # Apple FairPlay r'#EXT-X-(?:SESSION-)?KEY:.*?KEYFORMAT="com\.microsoft\.playready"', # Microsoft PlayReady r'#EXT-X-FAXS-CM:', # Adobe Flash Access )), manifest)) @classmethod def can_download(cls, manifest, info_dict, allow_unplayable_formats=False): UNSUPPORTED_FEATURES = [ # r'#EXT-X-BYTERANGE', # playlists composed of byte ranges of media files [2] # Live streams heuristic does not always work (e.g. geo restricted to Germany # http://hls-geo.daserste.de/i/videoportal/Film/c_620000/622873/format,716451,716457,716450,716458,716459,.mp4.csmil/index_4_av.m3u8?null=0) # r'#EXT-X-MEDIA-SEQUENCE:(?!0$)', # live streams [3] # This heuristic also is not correct since segments may not be appended as well. # Twitch vods of finished streams have EXT-X-PLAYLIST-TYPE:EVENT despite # no segments will definitely be appended to the end of the playlist. # r'#EXT-X-PLAYLIST-TYPE:EVENT', # media segments may be appended to the end of # # event media playlists [4] # r'#EXT-X-MAP:', # media initialization [5] # 1. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.4 # 2. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.2 # 3. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.2 # 4. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.3.5 # 5. https://tools.ietf.org/html/draft-pantos-http-live-streaming-17#section-4.3.2.5 ] if not allow_unplayable_formats: UNSUPPORTED_FEATURES += [ r'#EXT-X-KEY:METHOD=(?!NONE|AES-128)', # encrypted streams [1], but not necessarily DRM ] def check_results(): yield not info_dict.get('is_live') for feature in UNSUPPORTED_FEATURES: yield not re.search(feature, manifest) if not allow_unplayable_formats: yield not cls._has_drm(manifest) return all(check_results()) def real_download(self, filename, info_dict): man_url = info_dict['url'] self.to_screen(f'[{self.FD_NAME}] Downloading m3u8 manifest') urlh = self.ydl.urlopen(self._prepare_url(info_dict, man_url)) man_url = urlh.url s = urlh.read().decode('utf-8', 'ignore') can_download, message = self.can_download(s, info_dict, self.params.get('allow_unplayable_formats')), None if can_download: has_ffmpeg = FFmpegFD.available() no_crypto = not Cryptodome.AES and '#EXT-X-KEY:METHOD=AES-128' in s if no_crypto and has_ffmpeg: can_download, message = False, 'The stream has AES-128 encryption and pycryptodomex is not available' elif no_crypto: message = ('The stream has AES-128 encryption and neither ffmpeg nor pycryptodomex are available; ' 'Decryption will be performed natively, but will be extremely slow') elif info_dict.get('extractor_key') == 'Generic' and re.search(r'(?m)#EXT-X-MEDIA-SEQUENCE:(?!0$)', s): install_ffmpeg = '' if has_ffmpeg else 'install ffmpeg and ' message = ('Live HLS streams are not supported by the native downloader. If this is a livestream, ' f'please {install_ffmpeg}add "--downloader ffmpeg --hls-use-mpegts" to your command') if not can_download: if self._has_drm(s) and not self.params.get('allow_unplayable_formats'): if info_dict.get('has_drm') and self.params.get('test'): self.to_screen(f'[{self.FD_NAME}] This format is DRM protected', skip_eol=True) else: self.report_error( 'This format is DRM protected; Try selecting another format with --format or ' 'add --check-formats to automatically fallback to the next best format', tb=False) return False message = message or 'Unsupported features have been detected' fd = FFmpegFD(self.ydl, self.params) self.report_warning(f'{message}; extraction will be delegated to {fd.get_basename()}') return fd.real_download(filename, info_dict) elif message: self.report_warning(message) is_webvtt = info_dict['ext'] == 'vtt' if is_webvtt: real_downloader = None # Packing the fragments is not currently supported for external downloader else: real_downloader = get_suitable_downloader( info_dict, self.params, None, protocol='m3u8_frag_urls', to_stdout=(filename == '-')) if real_downloader and not real_downloader.supports_manifest(s): real_downloader = None if real_downloader: self.to_screen(f'[{self.FD_NAME}] Fragment downloads will be delegated to {real_downloader.get_basename()}') def is_ad_fragment_start(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=ad' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',ad')) def is_ad_fragment_end(s): return (s.startswith('#ANVATO-SEGMENT-INFO') and 'type=master' in s or s.startswith('#UPLYNK-SEGMENT') and s.endswith(',segment')) fragments = [] media_frags = 0 ad_frags = 0 ad_frag_next = False for line in s.splitlines(): line = line.strip() if not line: continue if line.startswith('#'): if is_ad_fragment_start(line): ad_frag_next = True elif is_ad_fragment_end(line): ad_frag_next = False continue if ad_frag_next: ad_frags += 1 continue media_frags += 1 ctx = { 'filename': filename, 'total_frags': media_frags, 'ad_frags': ad_frags, } if real_downloader: self._prepare_external_frag_download(ctx) else: self._prepare_and_start_frag_download(ctx, info_dict) extra_state = ctx.setdefault('extra_state', {}) format_index = info_dict.get('format_index') extra_segment_query = None if extra_param_to_segment_url := info_dict.get('extra_param_to_segment_url'): extra_segment_query = urllib.parse.parse_qs(extra_param_to_segment_url) extra_key_query = None if extra_param_to_key_url := info_dict.get('extra_param_to_key_url'): extra_key_query = urllib.parse.parse_qs(extra_param_to_key_url) i = 0 media_sequence = 0 decrypt_info = {'METHOD': 'NONE'} external_aes_key = traverse_obj(info_dict, ('hls_aes', 'key')) if external_aes_key: external_aes_key = binascii.unhexlify(remove_start(external_aes_key, '0x')) assert len(external_aes_key) in (16, 24, 32), 'Invalid length for HLS AES-128 key' external_aes_iv = traverse_obj(info_dict, ('hls_aes', 'iv')) if external_aes_iv: external_aes_iv = binascii.unhexlify(remove_start(external_aes_iv, '0x').zfill(32)) byte_range = {} discontinuity_count = 0 frag_index = 0 ad_frag_next = False for line in s.splitlines(): line = line.strip() if line: if not line.startswith('#'): if format_index and discontinuity_count != format_index: continue if ad_frag_next: continue frag_index += 1 if frag_index <= ctx['fragment_index']: continue frag_url = urljoin(man_url, line) if extra_segment_query: frag_url = update_url_query(frag_url, extra_segment_query) fragments.append({ 'frag_index': frag_index, 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, 'media_sequence': media_sequence, }) media_sequence += 1 elif line.startswith('#EXT-X-MAP'): if format_index and discontinuity_count != format_index: continue if frag_index > 0: self.report_error( 'Initialization fragment found after media fragments, unable to download') return False frag_index += 1 map_info = parse_m3u8_attributes(line[11:]) frag_url = urljoin(man_url, map_info.get('URI')) if extra_segment_query: frag_url = update_url_query(frag_url, extra_segment_query) if map_info.get('BYTERANGE'): splitted_byte_range = map_info.get('BYTERANGE').split('@') sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] byte_range = { 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } fragments.append({ 'frag_index': frag_index, 'url': frag_url, 'decrypt_info': decrypt_info, 'byte_range': byte_range, 'media_sequence': media_sequence, }) media_sequence += 1 elif line.startswith('#EXT-X-KEY'): decrypt_url = decrypt_info.get('URI') decrypt_info = parse_m3u8_attributes(line[11:]) if decrypt_info['METHOD'] == 'AES-128': if external_aes_iv: decrypt_info['IV'] = external_aes_iv elif 'IV' in decrypt_info: decrypt_info['IV'] = binascii.unhexlify(decrypt_info['IV'][2:].zfill(32)) if external_aes_key: decrypt_info['KEY'] = external_aes_key else: decrypt_info['URI'] = urljoin(man_url, decrypt_info['URI']) if extra_key_query or extra_segment_query: # Fall back to extra_segment_query to key for backwards compat decrypt_info['URI'] = update_url_query( decrypt_info['URI'], extra_key_query or extra_segment_query) if decrypt_url != decrypt_info['URI']: decrypt_info['KEY'] = None elif line.startswith('#EXT-X-MEDIA-SEQUENCE'): media_sequence = int(line[22:]) elif line.startswith('#EXT-X-BYTERANGE'): splitted_byte_range = line[17:].split('@') sub_range_start = int(splitted_byte_range[1]) if len(splitted_byte_range) == 2 else byte_range['end'] byte_range = { 'start': sub_range_start, 'end': sub_range_start + int(splitted_byte_range[0]), } elif is_ad_fragment_start(line): ad_frag_next = True elif is_ad_fragment_end(line): ad_frag_next = False elif line.startswith('#EXT-X-DISCONTINUITY'): discontinuity_count += 1 i += 1 # We only download the first fragment during the test if self.params.get('test', False): fragments = [fragments[0] if fragments else None] if real_downloader: info_dict['fragments'] = fragments fd = real_downloader(self.ydl, self.params) # TODO: Make progress updates work without hooking twice # for ph in self._progress_hooks: # fd.add_progress_hook(ph) return fd.real_download(filename, info_dict) if is_webvtt: def pack_fragment(frag_content, frag_index): output = io.StringIO() adjust = 0 overflow = False mpegts_last = None for block in webvtt.parse_fragment(frag_content): if isinstance(block, webvtt.CueBlock): extra_state['webvtt_mpegts_last'] = mpegts_last if overflow: extra_state['webvtt_mpegts_adjust'] += 1 overflow = False block.start += adjust block.end += adjust dedup_window = extra_state.setdefault('webvtt_dedup_window', []) ready = [] i = 0 is_new = True while i < len(dedup_window): wcue = dedup_window[i] wblock = webvtt.CueBlock.from_json(wcue) i += 1 if wblock.hinges(block): wcue['end'] = block.end is_new = False continue if wblock == block: is_new = False continue if wblock.end > block.start: continue ready.append(wblock) i -= 1 del dedup_window[i] if is_new: dedup_window.append(block.as_json) for block in ready: block.write_into(output) # we only emit cues once they fall out of the duplicate window continue elif isinstance(block, webvtt.Magic): # take care of MPEG PES timestamp overflow if block.mpegts is None: block.mpegts = 0 extra_state.setdefault('webvtt_mpegts_adjust', 0) block.mpegts += extra_state['webvtt_mpegts_adjust'] << 33 if block.mpegts < extra_state.get('webvtt_mpegts_last', 0): overflow = True block.mpegts += 1 << 33 mpegts_last = block.mpegts if frag_index == 1: extra_state['webvtt_mpegts'] = block.mpegts or 0 extra_state['webvtt_local'] = block.local or 0 # XXX: block.local = block.mpegts = None ? else: if block.mpegts is not None and block.local is not None: adjust = ( (block.mpegts - extra_state.get('webvtt_mpegts', 0)) - (block.local - extra_state.get('webvtt_local', 0)) ) continue elif isinstance(block, webvtt.HeaderBlock): if frag_index != 1: # XXX: this should probably be silent as well # or verify that all segments contain the same data self.report_warning(bug_reports_message( f'Discarding a {type(block).__name__} block found in the middle of the stream; ' 'if the subtitles display incorrectly,')) continue block.write_into(output) return output.getvalue().encode() def fin_fragments(): dedup_window = extra_state.get('webvtt_dedup_window') if not dedup_window: return b'' output = io.StringIO() for cue in dedup_window: webvtt.CueBlock.from_json(cue).write_into(output) return output.getvalue().encode() if len(fragments) == 1: self.download_and_append_fragments(ctx, fragments, info_dict) else: self.download_and_append_fragments( ctx, fragments, info_dict, pack_func=pack_fragment, finish_func=fin_fragments) else: return self.download_and_append_fragments(ctx, fragments, info_dict) yt-dlp-2024.09.27/yt_dlp/downloader/http.py000066400000000000000000000407001467563447100203360ustar00rootroot00000000000000import os import random import time from .common import FileDownloader from ..networking import Request from ..networking.exceptions import ( CertificateVerifyError, HTTPError, TransportError, ) from ..utils import ( ContentTooShortError, RetryManager, ThrottledDownload, XAttrMetadataError, XAttrUnavailableError, encodeFilename, int_or_none, parse_http_range, try_call, write_xattr, ) from ..utils.networking import HTTPHeaderDict class HttpFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] request_data = info_dict.get('request_data', None) class DownloadContext(dict): __getattr__ = dict.get __setattr__ = dict.__setitem__ __delattr__ = dict.__delitem__ ctx = DownloadContext() ctx.filename = filename ctx.tmpfilename = self.temp_name(filename) ctx.stream = None # Disable compression headers = HTTPHeaderDict({'Accept-Encoding': 'identity'}, info_dict.get('http_headers')) is_test = self.params.get('test', False) chunk_size = self._TEST_FILE_SIZE if is_test else ( self.params.get('http_chunk_size') or info_dict.get('downloader_options', {}).get('http_chunk_size') or 0) ctx.open_mode = 'wb' ctx.resume_len = 0 ctx.block_size = self.params.get('buffersize', 1024) ctx.start_time = time.time() # parse given Range req_start, req_end, _ = parse_http_range(headers.get('Range')) if self.params.get('continuedl', True): # Establish possible resume length if os.path.isfile(encodeFilename(ctx.tmpfilename)): ctx.resume_len = os.path.getsize( encodeFilename(ctx.tmpfilename)) ctx.is_resume = ctx.resume_len > 0 class SucceedDownload(Exception): pass class RetryDownload(Exception): def __init__(self, source_error): self.source_error = source_error class NextFragment(Exception): pass def establish_connection(): ctx.chunk_size = (random.randint(int(chunk_size * 0.95), chunk_size) if not is_test and chunk_size else chunk_size) if ctx.resume_len > 0: range_start = ctx.resume_len if req_start is not None: # offset the beginning of Range to be within request range_start += req_start if ctx.is_resume: self.report_resuming_byte(ctx.resume_len) ctx.open_mode = 'ab' elif req_start is not None: range_start = req_start elif ctx.chunk_size > 0: range_start = 0 else: range_start = None ctx.is_resume = False if ctx.chunk_size: chunk_aware_end = range_start + ctx.chunk_size - 1 # we're not allowed to download outside Range range_end = chunk_aware_end if req_end is None else min(chunk_aware_end, req_end) elif req_end is not None: # there's no need for chunked downloads, so download until the end of Range range_end = req_end else: range_end = None if try_call(lambda: range_start > range_end): ctx.resume_len = 0 ctx.open_mode = 'wb' raise RetryDownload(Exception(f'Conflicting range. (start={range_start} > end={range_end})')) if try_call(lambda: range_end >= ctx.content_len): range_end = ctx.content_len - 1 request = Request(url, request_data, headers) has_range = range_start is not None if has_range: request.headers['Range'] = f'bytes={int(range_start)}-{int_or_none(range_end) or ""}' # Establish connection try: ctx.data = self.ydl.urlopen(request) # When trying to resume, Content-Range HTTP header of response has to be checked # to match the value of requested Range HTTP header. This is due to a webservers # that don't support resuming and serve a whole file with no Content-Range # set in response despite of requested Range (see # https://github.com/ytdl-org/youtube-dl/issues/6057#issuecomment-126129799) if has_range: content_range = ctx.data.headers.get('Content-Range') content_range_start, content_range_end, content_len = parse_http_range(content_range) # Content-Range is present and matches requested Range, resume is possible if range_start == content_range_start and ( # Non-chunked download not ctx.chunk_size # Chunked download and requested piece or # its part is promised to be served or content_range_end == range_end or content_len < range_end): ctx.content_len = content_len if content_len or req_end: ctx.data_len = min(content_len or req_end, req_end or content_len) - (req_start or 0) return # Content-Range is either not present or invalid. Assuming remote webserver is # trying to send the whole file, resume is not possible, so wiping the local file # and performing entire redownload elif range_start > 0: self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' ctx.data_len = ctx.content_len = int_or_none(ctx.data.headers.get('Content-length', None)) except HTTPError as err: if err.status == 416: # Unable to resume (requested range not satisfiable) try: # Open the connection again without the range header ctx.data = self.ydl.urlopen( Request(url, request_data, headers)) content_length = ctx.data.headers['Content-Length'] except HTTPError as err: if err.status < 500 or err.status >= 600: raise else: # Examine the reported length if (content_length is not None and (ctx.resume_len - 100 < int(content_length) < ctx.resume_len + 100)): # The file had already been fully downloaded. # Explanation to the above condition: in issue #175 it was revealed that # YouTube sometimes adds or removes a few bytes from the end of the file, # changing the file size slightly and causing problems for some users. So # I decided to implement a suggested change and consider the file # completely downloaded if the file size differs less than 100 bytes from # the one in the hard drive. self.report_file_already_downloaded(ctx.filename) self.try_rename(ctx.tmpfilename, ctx.filename) self._hook_progress({ 'filename': ctx.filename, 'status': 'finished', 'downloaded_bytes': ctx.resume_len, 'total_bytes': ctx.resume_len, }, info_dict) raise SucceedDownload else: # The length does not match, we start the download over self.report_unable_to_resume() ctx.resume_len = 0 ctx.open_mode = 'wb' return elif err.status < 500 or err.status >= 600: # Unexpected HTTP error raise raise RetryDownload(err) except CertificateVerifyError: raise except TransportError as err: raise RetryDownload(err) def close_stream(): if ctx.stream is not None: if ctx.tmpfilename != '-': ctx.stream.close() ctx.stream = None def download(): data_len = ctx.data.headers.get('Content-length') if ctx.data.headers.get('Content-encoding'): # Content-encoding is present, Content-length is not reliable anymore as we are # doing auto decompression. (See: https://github.com/yt-dlp/yt-dlp/pull/6176) data_len = None # Range HTTP header may be ignored/unsupported by a webserver # (e.g. extractor/scivee.py, extractor/bambuser.py). # However, for a test we still would like to download just a piece of a file. # To achieve this we limit data_len to _TEST_FILE_SIZE and manually control # block size when downloading a file. if is_test and (data_len is None or int(data_len) > self._TEST_FILE_SIZE): data_len = self._TEST_FILE_SIZE if data_len is not None: data_len = int(data_len) + ctx.resume_len min_data_len = self.params.get('min_filesize') max_data_len = self.params.get('max_filesize') if min_data_len is not None and data_len < min_data_len: self.to_screen( f'\r[download] File is smaller than min-filesize ({data_len} bytes < {min_data_len} bytes). Aborting.') return False if max_data_len is not None and data_len > max_data_len: self.to_screen( f'\r[download] File is larger than max-filesize ({data_len} bytes > {max_data_len} bytes). Aborting.') return False byte_counter = 0 + ctx.resume_len block_size = ctx.block_size start = time.time() # measure time over whole while-loop, so slow_down() and best_block_size() work together properly now = None # needed for slow_down() in the first loop run before = start # start measuring def retry(e): close_stream() if ctx.tmpfilename == '-': ctx.resume_len = byte_counter else: try: ctx.resume_len = os.path.getsize(encodeFilename(ctx.tmpfilename)) except FileNotFoundError: ctx.resume_len = 0 raise RetryDownload(e) while True: try: # Download and write data_block = ctx.data.read(block_size if not is_test else min(block_size, data_len - byte_counter)) except TransportError as err: retry(err) byte_counter += len(data_block) # exit loop when download is finished if len(data_block) == 0: break # Open destination file just in time if ctx.stream is None: try: ctx.stream, ctx.tmpfilename = self.sanitize_open( ctx.tmpfilename, ctx.open_mode) assert ctx.stream is not None ctx.filename = self.undo_temp_name(ctx.tmpfilename) self.report_destination(ctx.filename) except OSError as err: self.report_error(f'unable to open for writing: {err}') return False if self.params.get('xattr_set_filesize', False) and data_len is not None: try: write_xattr(ctx.tmpfilename, 'user.ytdl.filesize', str(data_len).encode()) except (XAttrUnavailableError, XAttrMetadataError) as err: self.report_error(f'unable to set filesize xattr: {err}') try: ctx.stream.write(data_block) except OSError as err: self.to_stderr('\n') self.report_error(f'unable to write data: {err}') return False # Apply rate limit self.slow_down(start, now, byte_counter - ctx.resume_len) # end measuring of one loop run now = time.time() after = now # Adjust block size if not self.params.get('noresizebuffer', False): block_size = self.best_block_size(after - before, len(data_block)) before = after # Progress message speed = self.calc_speed(start, now, byte_counter - ctx.resume_len) if ctx.data_len is None: eta = None else: eta = self.calc_eta(start, time.time(), ctx.data_len - ctx.resume_len, byte_counter - ctx.resume_len) self._hook_progress({ 'status': 'downloading', 'downloaded_bytes': byte_counter, 'total_bytes': ctx.data_len, 'tmpfilename': ctx.tmpfilename, 'filename': ctx.filename, 'eta': eta, 'speed': speed, 'elapsed': now - ctx.start_time, 'ctx_id': info_dict.get('ctx_id'), }, info_dict) if data_len is not None and byte_counter == data_len: break if speed and speed < (self.params.get('throttledratelimit') or 0): # The speed must stay below the limit for 3 seconds # This prevents raising error when the speed temporarily goes down if ctx.throttle_start is None: ctx.throttle_start = now elif now - ctx.throttle_start > 3: if ctx.stream is not None and ctx.tmpfilename != '-': ctx.stream.close() raise ThrottledDownload elif speed: ctx.throttle_start = None if ctx.stream is None: self.to_stderr('\n') self.report_error('Did not get any data blocks') return False if not is_test and ctx.chunk_size and ctx.content_len is not None and byte_counter < ctx.content_len: ctx.resume_len = byte_counter raise NextFragment if ctx.tmpfilename != '-': ctx.stream.close() if data_len is not None and byte_counter != data_len: err = ContentTooShortError(byte_counter, int(data_len)) retry(err) self.try_rename(ctx.tmpfilename, ctx.filename) # Update file modification time if self.params.get('updatetime', True): info_dict['filetime'] = self.try_utime(ctx.filename, ctx.data.headers.get('last-modified', None)) self._hook_progress({ 'downloaded_bytes': byte_counter, 'total_bytes': byte_counter, 'filename': ctx.filename, 'status': 'finished', 'elapsed': time.time() - ctx.start_time, 'ctx_id': info_dict.get('ctx_id'), }, info_dict) return True for retry in RetryManager(self.params.get('retries'), self.report_retry): try: establish_connection() return download() except RetryDownload as err: retry.error = err.source_error continue except NextFragment: retry.error = None retry.attempt -= 1 continue except SucceedDownload: return True except: # noqa: E722 close_stream() raise return False yt-dlp-2024.09.27/yt_dlp/downloader/ism.py000066400000000000000000000265561467563447100201640ustar00rootroot00000000000000import binascii import io import struct import time from .fragment import FragmentFD from ..networking.exceptions import HTTPError from ..utils import RetryManager u8 = struct.Struct('>B') u88 = struct.Struct('>Bx') u16 = struct.Struct('>H') u1616 = struct.Struct('>Hxx') u32 = struct.Struct('>I') u64 = struct.Struct('>Q') s88 = struct.Struct('>bx') s16 = struct.Struct('>h') s1616 = struct.Struct('>hxx') s32 = struct.Struct('>i') unity_matrix = (s32.pack(0x10000) + s32.pack(0) * 3) * 2 + s32.pack(0x40000000) TRACK_ENABLED = 0x1 TRACK_IN_MOVIE = 0x2 TRACK_IN_PREVIEW = 0x4 SELF_CONTAINED = 0x1 def box(box_type, payload): return u32.pack(8 + len(payload)) + box_type + payload def full_box(box_type, version, flags, payload): return box(box_type, u8.pack(version) + u32.pack(flags)[1:] + payload) def write_piff_header(stream, params): track_id = params['track_id'] fourcc = params['fourcc'] duration = params['duration'] timescale = params.get('timescale', 10000000) language = params.get('language', 'und') height = params.get('height', 0) width = params.get('width', 0) stream_type = params['stream_type'] creation_time = modification_time = int(time.time()) ftyp_payload = b'isml' # major brand ftyp_payload += u32.pack(1) # minor version ftyp_payload += b'piff' + b'iso2' # compatible brands stream.write(box(b'ftyp', ftyp_payload)) # File Type Box mvhd_payload = u64.pack(creation_time) mvhd_payload += u64.pack(modification_time) mvhd_payload += u32.pack(timescale) mvhd_payload += u64.pack(duration) mvhd_payload += s1616.pack(1) # rate mvhd_payload += s88.pack(1) # volume mvhd_payload += u16.pack(0) # reserved mvhd_payload += u32.pack(0) * 2 # reserved mvhd_payload += unity_matrix mvhd_payload += u32.pack(0) * 6 # pre defined mvhd_payload += u32.pack(0xffffffff) # next track id moov_payload = full_box(b'mvhd', 1, 0, mvhd_payload) # Movie Header Box tkhd_payload = u64.pack(creation_time) tkhd_payload += u64.pack(modification_time) tkhd_payload += u32.pack(track_id) # track id tkhd_payload += u32.pack(0) # reserved tkhd_payload += u64.pack(duration) tkhd_payload += u32.pack(0) * 2 # reserved tkhd_payload += s16.pack(0) # layer tkhd_payload += s16.pack(0) # alternate group tkhd_payload += s88.pack(1 if stream_type == 'audio' else 0) # volume tkhd_payload += u16.pack(0) # reserved tkhd_payload += unity_matrix tkhd_payload += u1616.pack(width) tkhd_payload += u1616.pack(height) trak_payload = full_box(b'tkhd', 1, TRACK_ENABLED | TRACK_IN_MOVIE | TRACK_IN_PREVIEW, tkhd_payload) # Track Header Box mdhd_payload = u64.pack(creation_time) mdhd_payload += u64.pack(modification_time) mdhd_payload += u32.pack(timescale) mdhd_payload += u64.pack(duration) mdhd_payload += u16.pack(((ord(language[0]) - 0x60) << 10) | ((ord(language[1]) - 0x60) << 5) | (ord(language[2]) - 0x60)) mdhd_payload += u16.pack(0) # pre defined mdia_payload = full_box(b'mdhd', 1, 0, mdhd_payload) # Media Header Box hdlr_payload = u32.pack(0) # pre defined if stream_type == 'audio': # handler type hdlr_payload += b'soun' hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'SoundHandler\0' # name elif stream_type == 'video': hdlr_payload += b'vide' hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'VideoHandler\0' # name elif stream_type == 'text': hdlr_payload += b'subt' hdlr_payload += u32.pack(0) * 3 # reserved hdlr_payload += b'SubtitleHandler\0' # name else: assert False mdia_payload += full_box(b'hdlr', 0, 0, hdlr_payload) # Handler Reference Box if stream_type == 'audio': smhd_payload = s88.pack(0) # balance smhd_payload += u16.pack(0) # reserved media_header_box = full_box(b'smhd', 0, 0, smhd_payload) # Sound Media Header elif stream_type == 'video': vmhd_payload = u16.pack(0) # graphics mode vmhd_payload += u16.pack(0) * 3 # opcolor media_header_box = full_box(b'vmhd', 0, 1, vmhd_payload) # Video Media Header elif stream_type == 'text': media_header_box = full_box(b'sthd', 0, 0, b'') # Subtitle Media Header else: assert False minf_payload = media_header_box dref_payload = u32.pack(1) # entry count dref_payload += full_box(b'url ', 0, SELF_CONTAINED, b'') # Data Entry URL Box dinf_payload = full_box(b'dref', 0, 0, dref_payload) # Data Reference Box minf_payload += box(b'dinf', dinf_payload) # Data Information Box stsd_payload = u32.pack(1) # entry count sample_entry_payload = u8.pack(0) * 6 # reserved sample_entry_payload += u16.pack(1) # data reference index if stream_type == 'audio': sample_entry_payload += u32.pack(0) * 2 # reserved sample_entry_payload += u16.pack(params.get('channels', 2)) sample_entry_payload += u16.pack(params.get('bits_per_sample', 16)) sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u1616.pack(params['sampling_rate']) if fourcc == 'AACL': sample_entry_box = box(b'mp4a', sample_entry_payload) if fourcc == 'EC-3': sample_entry_box = box(b'ec-3', sample_entry_payload) elif stream_type == 'video': sample_entry_payload += u16.pack(0) # pre defined sample_entry_payload += u16.pack(0) # reserved sample_entry_payload += u32.pack(0) * 3 # pre defined sample_entry_payload += u16.pack(width) sample_entry_payload += u16.pack(height) sample_entry_payload += u1616.pack(0x48) # horiz resolution 72 dpi sample_entry_payload += u1616.pack(0x48) # vert resolution 72 dpi sample_entry_payload += u32.pack(0) # reserved sample_entry_payload += u16.pack(1) # frame count sample_entry_payload += u8.pack(0) * 32 # compressor name sample_entry_payload += u16.pack(0x18) # depth sample_entry_payload += s16.pack(-1) # pre defined codec_private_data = binascii.unhexlify(params['codec_private_data'].encode()) if fourcc in ('H264', 'AVC1'): sps, pps = codec_private_data.split(u32.pack(1))[1:] avcc_payload = u8.pack(1) # configuration version avcc_payload += sps[1:4] # avc profile indication + profile compatibility + avc level indication avcc_payload += u8.pack(0xfc | (params.get('nal_unit_length_field', 4) - 1)) # complete representation (1) + reserved (11111) + length size minus one avcc_payload += u8.pack(1) # reserved (0) + number of sps (0000001) avcc_payload += u16.pack(len(sps)) avcc_payload += sps avcc_payload += u8.pack(1) # number of pps avcc_payload += u16.pack(len(pps)) avcc_payload += pps sample_entry_payload += box(b'avcC', avcc_payload) # AVC Decoder Configuration Record sample_entry_box = box(b'avc1', sample_entry_payload) # AVC Simple Entry else: assert False elif stream_type == 'text': if fourcc == 'TTML': sample_entry_payload += b'http://www.w3.org/ns/ttml\0' # namespace sample_entry_payload += b'\0' # schema location sample_entry_payload += b'\0' # auxilary mime types(??) sample_entry_box = box(b'stpp', sample_entry_payload) else: assert False else: assert False stsd_payload += sample_entry_box stbl_payload = full_box(b'stsd', 0, 0, stsd_payload) # Sample Description Box stts_payload = u32.pack(0) # entry count stbl_payload += full_box(b'stts', 0, 0, stts_payload) # Decoding Time to Sample Box stsc_payload = u32.pack(0) # entry count stbl_payload += full_box(b'stsc', 0, 0, stsc_payload) # Sample To Chunk Box stco_payload = u32.pack(0) # entry count stbl_payload += full_box(b'stco', 0, 0, stco_payload) # Chunk Offset Box minf_payload += box(b'stbl', stbl_payload) # Sample Table Box mdia_payload += box(b'minf', minf_payload) # Media Information Box trak_payload += box(b'mdia', mdia_payload) # Media Box moov_payload += box(b'trak', trak_payload) # Track Box mehd_payload = u64.pack(duration) mvex_payload = full_box(b'mehd', 1, 0, mehd_payload) # Movie Extends Header Box trex_payload = u32.pack(track_id) # track id trex_payload += u32.pack(1) # default sample description index trex_payload += u32.pack(0) # default sample duration trex_payload += u32.pack(0) # default sample size trex_payload += u32.pack(0) # default sample flags mvex_payload += full_box(b'trex', 0, 0, trex_payload) # Track Extends Box moov_payload += box(b'mvex', mvex_payload) # Movie Extends Box stream.write(box(b'moov', moov_payload)) # Movie Box def extract_box_data(data, box_sequence): data_reader = io.BytesIO(data) while True: box_size = u32.unpack(data_reader.read(4))[0] box_type = data_reader.read(4) if box_type == box_sequence[0]: box_data = data_reader.read(box_size - 8) if len(box_sequence) == 1: return box_data return extract_box_data(box_data, box_sequence[1:]) data_reader.seek(box_size - 8, 1) class IsmFD(FragmentFD): """ Download segments in a ISM manifest """ def real_download(self, filename, info_dict): segments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] ctx = { 'filename': filename, 'total_frags': len(segments), } self._prepare_and_start_frag_download(ctx, info_dict) extra_state = ctx.setdefault('extra_state', { 'ism_track_written': False, }) skip_unavailable_fragments = self.params.get('skip_unavailable_fragments', True) frag_index = 0 for segment in segments: frag_index += 1 if frag_index <= ctx['fragment_index']: continue retry_manager = RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index, fatal=not skip_unavailable_fragments) for retry in retry_manager: try: success = self._download_fragment(ctx, segment['url'], info_dict) if not success: return False frag_content = self._read_fragment(ctx) if not extra_state['ism_track_written']: tfhd_data = extract_box_data(frag_content, [b'moof', b'traf', b'tfhd']) info_dict['_download_params']['track_id'] = u32.unpack(tfhd_data[4:8])[0] write_piff_header(ctx['dest_stream'], info_dict['_download_params']) extra_state['ism_track_written'] = True self._append_fragment(ctx, frag_content) except HTTPError as err: retry.error = err continue if retry_manager.error: if not skip_unavailable_fragments: return False self.report_skip_fragment(frag_index) return self._finish_frag_download(ctx, info_dict) yt-dlp-2024.09.27/yt_dlp/downloader/mhtml.py000066400000000000000000000132351467563447100205030ustar00rootroot00000000000000import io import quopri import re import uuid from .fragment import FragmentFD from ..compat import imghdr from ..utils import escapeHTML, formatSeconds, srt_subtitles_timecode, urljoin from ..version import __version__ as YT_DLP_VERSION class MhtmlFD(FragmentFD): _STYLESHEET = '''\ html, body { margin: 0; padding: 0; height: 100vh; } html { overflow-y: scroll; scroll-snap-type: y mandatory; } body { scroll-snap-type: y mandatory; display: flex; flex-flow: column; } body > figure { max-width: 100vw; max-height: 100vh; scroll-snap-align: center; } body > figure > figcaption { text-align: center; height: 2.5em; } body > figure > img { display: block; margin: auto; max-width: 100%; max-height: calc(100vh - 5em); } ''' _STYLESHEET = re.sub(r'\s+', ' ', _STYLESHEET) _STYLESHEET = re.sub(r'\B \B|(?<=[\w\-]) (?=[^\w\-])|(?<=[^\w\-]) (?=[\w\-])', '', _STYLESHEET) @staticmethod def _escape_mime(s): return '=?utf-8?Q?' + (b''.join( bytes((b,)) if b >= 0x20 else b'=%02X' % b for b in quopri.encodestring(s.encode(), header=True) )).decode('us-ascii') + '?=' def _gen_cid(self, i, fragment, frag_boundary): return f'{i}.{frag_boundary}@yt-dlp.github.io.invalid' def _gen_stub(self, *, fragments, frag_boundary, title): output = io.StringIO() output.write( '' '' '' f'' f'{escapeHTML(title)}' f'' '') t0 = 0 for i, frag in enumerate(fragments): output.write('
') try: t1 = t0 + frag['duration'] output.write(( '
Slide #{num}: {t0} – {t1} (duration: {duration})
' ).format( num=i + 1, t0=srt_subtitles_timecode(t0), t1=srt_subtitles_timecode(t1), duration=formatSeconds(frag['duration'], msec=True), )) except (KeyError, ValueError, TypeError): t1 = None output.write(f'
Slide #{i + 1}
') output.write(f'') output.write('
') t0 = t1 return output.getvalue() def real_download(self, filename, info_dict): fragment_base_url = info_dict.get('fragment_base_url') fragments = info_dict['fragments'][:1] if self.params.get( 'test', False) else info_dict['fragments'] title = info_dict.get('title', info_dict['format_id']) origin = info_dict.get('webpage_url', info_dict['url']) ctx = { 'filename': filename, 'total_frags': len(fragments), } self._prepare_and_start_frag_download(ctx, info_dict) extra_state = ctx.setdefault('extra_state', { 'header_written': False, 'mime_boundary': str(uuid.uuid4()).replace('-', ''), }) frag_boundary = extra_state['mime_boundary'] if not extra_state['header_written']: stub = self._gen_stub( fragments=fragments, frag_boundary=frag_boundary, title=title, ) ctx['dest_stream'].write(( 'MIME-Version: 1.0\r\n' 'From: \r\n' 'To: \r\n' f'Subject: {self._escape_mime(title)}\r\n' 'Content-type: multipart/related; ' f'boundary="{frag_boundary}"; ' 'type="text/html"\r\n' f'X.yt-dlp.Origin: {origin}\r\n' '\r\n' f'--{frag_boundary}\r\n' 'Content-Type: text/html; charset=utf-8\r\n' f'Content-Length: {len(stub)}\r\n' '\r\n' f'{stub}\r\n').encode()) extra_state['header_written'] = True for i, fragment in enumerate(fragments): if (i + 1) <= ctx['fragment_index']: continue fragment_url = fragment.get('url') if not fragment_url: assert fragment_base_url fragment_url = urljoin(fragment_base_url, fragment['path']) success = self._download_fragment(ctx, fragment_url, info_dict) if not success: continue frag_content = self._read_fragment(ctx) frag_header = io.BytesIO() frag_header.write( b'--%b\r\n' % frag_boundary.encode('us-ascii')) frag_header.write( b'Content-ID: <%b>\r\n' % self._gen_cid(i, fragment, frag_boundary).encode('us-ascii')) frag_header.write( b'Content-type: %b\r\n' % f'image/{imghdr.what(h=frag_content) or "jpeg"}'.encode()) frag_header.write( b'Content-length: %u\r\n' % len(frag_content)) frag_header.write( b'Content-location: %b\r\n' % fragment_url.encode('us-ascii')) frag_header.write( b'X.yt-dlp.Duration: %f\r\n' % fragment['duration']) frag_header.write(b'\r\n') self._append_fragment( ctx, frag_header.getvalue() + frag_content + b'\r\n') ctx['dest_stream'].write( b'--%b--\r\n\r\n' % frag_boundary.encode('us-ascii')) return self._finish_frag_download(ctx, info_dict) yt-dlp-2024.09.27/yt_dlp/downloader/niconico.py000066400000000000000000000124141467563447100211610ustar00rootroot00000000000000import json import threading import time from . import get_suitable_downloader from .common import FileDownloader from .external import FFmpegFD from ..networking import Request from ..utils import DownloadError, str_or_none, try_get class NiconicoDmcFD(FileDownloader): """ Downloading niconico douga from DMC with heartbeat """ def real_download(self, filename, info_dict): from ..extractor.niconico import NiconicoIE self.to_screen(f'[{self.FD_NAME}] Downloading from DMC') ie = NiconicoIE(self.ydl) info_dict, heartbeat_info_dict = ie._get_heartbeat_info(info_dict) fd = get_suitable_downloader(info_dict, params=self.params)(self.ydl, self.params) success = download_complete = False timer = [None] heartbeat_lock = threading.Lock() heartbeat_url = heartbeat_info_dict['url'] heartbeat_data = heartbeat_info_dict['data'].encode() heartbeat_interval = heartbeat_info_dict.get('interval', 30) request = Request(heartbeat_url, heartbeat_data) def heartbeat(): try: self.ydl.urlopen(request).read() except Exception: self.to_screen(f'[{self.FD_NAME}] Heartbeat failed') with heartbeat_lock: if not download_complete: timer[0] = threading.Timer(heartbeat_interval, heartbeat) timer[0].start() heartbeat_info_dict['ping']() self.to_screen('[%s] Heartbeat with %d second interval ...' % (self.FD_NAME, heartbeat_interval)) try: heartbeat() if type(fd).__name__ == 'HlsFD': info_dict.update(ie._extract_m3u8_formats(info_dict['url'], info_dict['id'])[0]) success = fd.real_download(filename, info_dict) finally: if heartbeat_lock: with heartbeat_lock: timer[0].cancel() download_complete = True return success class NiconicoLiveFD(FileDownloader): """ Downloads niconico live without being stopped """ def real_download(self, filename, info_dict): video_id = info_dict['video_id'] ws_url = info_dict['url'] ws_extractor = info_dict['ws'] ws_origin_host = info_dict['origin'] live_quality = info_dict.get('live_quality', 'high') live_latency = info_dict.get('live_latency', 'high') dl = FFmpegFD(self.ydl, self.params or {}) new_info_dict = info_dict.copy() new_info_dict.update({ 'protocol': 'm3u8', }) def communicate_ws(reconnect): if reconnect: ws = self.ydl.urlopen(Request(ws_url, headers={'Origin': f'https://{ws_origin_host}'})) if self.ydl.params.get('verbose', False): self.to_screen('[debug] Sending startWatching request') ws.send(json.dumps({ 'type': 'startWatching', 'data': { 'stream': { 'quality': live_quality, 'protocol': 'hls+fmp4', 'latency': live_latency, 'chasePlay': False, }, 'room': { 'protocol': 'webSocket', 'commentable': True, }, 'reconnect': True, }, })) else: ws = ws_extractor with ws: while True: recv = ws.recv() if not recv: continue data = json.loads(recv) if not data or not isinstance(data, dict): continue if data.get('type') == 'ping': # pong back ws.send(r'{"type":"pong"}') ws.send(r'{"type":"keepSeat"}') elif data.get('type') == 'disconnect': self.write_debug(data) return True elif data.get('type') == 'error': self.write_debug(data) message = try_get(data, lambda x: x['body']['code'], str) or recv return DownloadError(message) elif self.ydl.params.get('verbose', False): if len(recv) > 100: recv = recv[:100] + '...' self.to_screen(f'[debug] Server said: {recv}') def ws_main(): reconnect = False while True: try: ret = communicate_ws(reconnect) if ret is True: return except BaseException as e: self.to_screen('[{}] {}: Connection error occured, reconnecting after 10 seconds: {}'.format('niconico:live', video_id, str_or_none(e))) time.sleep(10) continue finally: reconnect = True thread = threading.Thread(target=ws_main, daemon=True) thread.start() return dl.download(filename, new_info_dict) yt-dlp-2024.09.27/yt_dlp/downloader/rtmp.py000066400000000000000000000213271467563447100203450ustar00rootroot00000000000000import os import re import subprocess import time from .common import FileDownloader from ..utils import ( Popen, check_executable, encodeArgument, encodeFilename, get_exe_version, ) def rtmpdump_version(): return get_exe_version( 'rtmpdump', ['--help'], r'(?i)RTMPDump\s*v?([0-9a-zA-Z._-]+)') class RtmpFD(FileDownloader): def real_download(self, filename, info_dict): def run_rtmpdump(args): start = time.time() resume_percent = None resume_downloaded_data_len = None proc = Popen(args, stderr=subprocess.PIPE) cursor_in_new_line = True proc_stderr_closed = False try: while not proc_stderr_closed: # read line from stderr line = '' while True: char = proc.stderr.read(1) if not char: proc_stderr_closed = True break if char in [b'\r', b'\n']: break line += char.decode('ascii', 'replace') if not line: # proc_stderr_closed is True continue mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) if mobj: downloaded_data_len = int(float(mobj.group(1)) * 1024) percent = float(mobj.group(2)) if not resume_percent: resume_percent = percent resume_downloaded_data_len = downloaded_data_len time_now = time.time() eta = self.calc_eta(start, time_now, 100 - resume_percent, percent - resume_percent) speed = self.calc_speed(start, time_now, downloaded_data_len - resume_downloaded_data_len) data_len = None if percent > 0: data_len = int(downloaded_data_len * 100 / percent) self._hook_progress({ 'status': 'downloading', 'downloaded_bytes': downloaded_data_len, 'total_bytes_estimate': data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'eta': eta, 'elapsed': time_now - start, 'speed': speed, }, info_dict) cursor_in_new_line = False else: # no percent for live streams mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line) if mobj: downloaded_data_len = int(float(mobj.group(1)) * 1024) time_now = time.time() speed = self.calc_speed(start, time_now, downloaded_data_len) self._hook_progress({ 'downloaded_bytes': downloaded_data_len, 'tmpfilename': tmpfilename, 'filename': filename, 'status': 'downloading', 'elapsed': time_now - start, 'speed': speed, }, info_dict) cursor_in_new_line = False elif self.params.get('verbose', False): if not cursor_in_new_line: self.to_screen('') cursor_in_new_line = True self.to_screen('[rtmpdump] ' + line) if not cursor_in_new_line: self.to_screen('') return proc.wait() except BaseException: # Including KeyboardInterrupt proc.kill(timeout=None) raise url = info_dict['url'] player_url = info_dict.get('player_url') page_url = info_dict.get('page_url') app = info_dict.get('app') play_path = info_dict.get('play_path') tc_url = info_dict.get('tc_url') flash_version = info_dict.get('flash_version') live = info_dict.get('rtmp_live', False) conn = info_dict.get('rtmp_conn') protocol = info_dict.get('rtmp_protocol') real_time = info_dict.get('rtmp_real_time', False) no_resume = info_dict.get('no_resume', False) continue_dl = self.params.get('continuedl', True) self.report_destination(filename) tmpfilename = self.temp_name(filename) test = self.params.get('test', False) # Check for rtmpdump first if not check_executable('rtmpdump', ['-h']): self.report_error('RTMP download detected but "rtmpdump" could not be run. Please install') return False # Download using rtmpdump. rtmpdump returns exit code 2 when # the connection was interrupted and resuming appears to be # possible. This is part of rtmpdump's normal usage, AFAIK. basic_args = [ 'rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] if player_url is not None: basic_args += ['--swfVfy', player_url] if page_url is not None: basic_args += ['--pageUrl', page_url] if app is not None: basic_args += ['--app', app] if play_path is not None: basic_args += ['--playpath', play_path] if tc_url is not None: basic_args += ['--tcUrl', tc_url] if test: basic_args += ['--stop', '1'] if flash_version is not None: basic_args += ['--flashVer', flash_version] if live: basic_args += ['--live'] if isinstance(conn, list): for entry in conn: basic_args += ['--conn', entry] elif isinstance(conn, str): basic_args += ['--conn', conn] if protocol is not None: basic_args += ['--protocol', protocol] if real_time: basic_args += ['--realtime'] args = basic_args if not no_resume and continue_dl and not live: args += ['--resume'] if not live and continue_dl: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] self._debug_cmd(args, exe='rtmpdump') RD_SUCCESS = 0 RD_FAILED = 1 RD_INCOMPLETE = 2 RD_NO_CONNECT = 3 started = time.time() try: retval = run_rtmpdump(args) except KeyboardInterrupt: if not info_dict.get('is_live'): raise retval = RD_SUCCESS self.to_screen('\n[rtmpdump] Interrupted by user') if retval == RD_NO_CONNECT: self.report_error('[rtmpdump] Could not connect to RTMP server.') return False while retval in (RD_INCOMPLETE, RD_FAILED) and not test and not live: prevsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(f'[rtmpdump] Downloaded {prevsize} bytes') time.sleep(5.0) # This seems to be needed args = [*basic_args, '--resume'] if retval == RD_FAILED: args += ['--skip', '1'] args = [encodeArgument(a) for a in args] retval = run_rtmpdump(args) cursize = os.path.getsize(encodeFilename(tmpfilename)) if prevsize == cursize and retval == RD_FAILED: break # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those if prevsize == cursize and retval == RD_INCOMPLETE and cursize > 1024: self.to_screen('[rtmpdump] Could not download the whole video. This can happen for some advertisements.') retval = RD_SUCCESS break if retval == RD_SUCCESS or (test and retval == RD_INCOMPLETE): fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(f'[rtmpdump] Downloaded {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, 'total_bytes': fsize, 'filename': filename, 'status': 'finished', 'elapsed': time.time() - started, }, info_dict) return True else: self.to_stderr('\n') self.report_error('rtmpdump exited with code %d' % retval) return False yt-dlp-2024.09.27/yt_dlp/downloader/rtsp.py000066400000000000000000000027451467563447100203560ustar00rootroot00000000000000import os import subprocess from .common import FileDownloader from ..utils import check_executable, encodeFilename class RtspFD(FileDownloader): def real_download(self, filename, info_dict): url = info_dict['url'] self.report_destination(filename) tmpfilename = self.temp_name(filename) if check_executable('mplayer', ['-h']): args = [ 'mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] elif check_executable('mpv', ['-h']): args = [ 'mpv', '-really-quiet', '--vo=null', '--stream-dump=' + tmpfilename, url] else: self.report_error('MMS or RTSP download detected but neither "mplayer" nor "mpv" could be run. Please install one') return False self._debug_cmd(args) retval = subprocess.call(args) if retval == 0: fsize = os.path.getsize(encodeFilename(tmpfilename)) self.to_screen(f'\r[{args[0]}] {fsize} bytes') self.try_rename(tmpfilename, filename) self._hook_progress({ 'downloaded_bytes': fsize, 'total_bytes': fsize, 'filename': filename, 'status': 'finished', }, info_dict) return True else: self.to_stderr('\n') self.report_error('%s exited with code %d' % (args[0], retval)) return False yt-dlp-2024.09.27/yt_dlp/downloader/websocket.py000066400000000000000000000033541467563447100213510ustar00rootroot00000000000000import asyncio import contextlib import os import signal import threading from .common import FileDownloader from .external import FFmpegFD from ..dependencies import websockets class FFmpegSinkFD(FileDownloader): """ A sink to ffmpeg for downloading fragments in any form """ def real_download(self, filename, info_dict): info_copy = info_dict.copy() info_copy['url'] = '-' async def call_conn(proc, stdin): try: await self.real_connection(stdin, info_dict) except OSError: pass finally: with contextlib.suppress(OSError): stdin.flush() stdin.close() os.kill(os.getpid(), signal.SIGINT) class FFmpegStdinFD(FFmpegFD): @classmethod def get_basename(cls): return FFmpegFD.get_basename() def on_process_started(self, proc, stdin): thread = threading.Thread(target=asyncio.run, daemon=True, args=(call_conn(proc, stdin), )) thread.start() return FFmpegStdinFD(self.ydl, self.params or {}).download(filename, info_copy) async def real_connection(self, sink, info_dict): """ Override this in subclasses """ raise NotImplementedError('This method must be implemented by subclasses') class WebSocketFragmentFD(FFmpegSinkFD): async def real_connection(self, sink, info_dict): async with websockets.connect(info_dict['url'], extra_headers=info_dict.get('http_headers', {})) as ws: while True: recv = await ws.recv() if isinstance(recv, str): recv = recv.encode('utf8') sink.write(recv) yt-dlp-2024.09.27/yt_dlp/downloader/youtube_live_chat.py000066400000000000000000000251671467563447100231030ustar00rootroot00000000000000import json import time from .fragment import FragmentFD from ..networking.exceptions import HTTPError from ..utils import ( RegexNotFoundError, RetryManager, dict_get, int_or_none, try_get, ) from ..utils.networking import HTTPHeaderDict class YoutubeLiveChatFD(FragmentFD): """ Downloads YouTube live chats fragment by fragment """ def real_download(self, filename, info_dict): video_id = info_dict['video_id'] self.to_screen(f'[{self.FD_NAME}] Downloading live chat') if not self.params.get('skip_download') and info_dict['protocol'] == 'youtube_live_chat': self.report_warning('Live chat download runs until the livestream ends. ' 'If you wish to download the video simultaneously, run a separate yt-dlp instance') test = self.params.get('test', False) ctx = { 'filename': filename, 'live': True, 'total_frags': None, } from ..extractor.youtube import YoutubeBaseInfoExtractor ie = YoutubeBaseInfoExtractor(self.ydl) start_time = int(time.time() * 1000) def dl_fragment(url, data=None, headers=None): http_headers = HTTPHeaderDict(info_dict.get('http_headers'), headers) return self._download_fragment(ctx, url, info_dict, http_headers, data) def parse_actions_replay(live_chat_continuation): offset = continuation_id = click_tracking_params = None processed_fragment = bytearray() for action in live_chat_continuation.get('actions', []): if 'replayChatItemAction' in action: replay_chat_item_action = action['replayChatItemAction'] offset = int(replay_chat_item_action['videoOffsetTimeMsec']) processed_fragment.extend( json.dumps(action, ensure_ascii=False).encode() + b'\n') if offset is not None: continuation = try_get( live_chat_continuation, lambda x: x['continuations'][0]['liveChatReplayContinuationData'], dict) if continuation: continuation_id = continuation.get('continuation') click_tracking_params = continuation.get('clickTrackingParams') self._append_fragment(ctx, processed_fragment) return continuation_id, offset, click_tracking_params def try_refresh_replay_beginning(live_chat_continuation): # choose the second option that contains the unfiltered live chat replay refresh_continuation = try_get( live_chat_continuation, lambda x: x['header']['liveChatHeaderRenderer']['viewSelector']['sortFilterSubMenuRenderer']['subMenuItems'][1]['continuation']['reloadContinuationData'], dict) if refresh_continuation: # no data yet but required to call _append_fragment self._append_fragment(ctx, b'') refresh_continuation_id = refresh_continuation.get('continuation') offset = 0 click_tracking_params = refresh_continuation.get('trackingParams') return refresh_continuation_id, offset, click_tracking_params return parse_actions_replay(live_chat_continuation) live_offset = 0 def parse_actions_live(live_chat_continuation): nonlocal live_offset continuation_id = click_tracking_params = None processed_fragment = bytearray() for action in live_chat_continuation.get('actions', []): timestamp = self.parse_live_timestamp(action) if timestamp is not None: live_offset = timestamp - start_time # compatibility with replay format pseudo_action = { 'replayChatItemAction': {'actions': [action]}, 'videoOffsetTimeMsec': str(live_offset), 'isLive': True, } processed_fragment.extend( json.dumps(pseudo_action, ensure_ascii=False).encode() + b'\n') continuation_data_getters = [ lambda x: x['continuations'][0]['invalidationContinuationData'], lambda x: x['continuations'][0]['timedContinuationData'], ] continuation_data = try_get(live_chat_continuation, continuation_data_getters, dict) if continuation_data: continuation_id = continuation_data.get('continuation') click_tracking_params = continuation_data.get('clickTrackingParams') timeout_ms = int_or_none(continuation_data.get('timeoutMs')) if timeout_ms is not None: time.sleep(timeout_ms / 1000) self._append_fragment(ctx, processed_fragment) return continuation_id, live_offset, click_tracking_params def download_and_parse_fragment(url, frag_index, request_data=None, headers=None): for retry in RetryManager(self.params.get('fragment_retries'), self.report_retry, frag_index=frag_index): try: success = dl_fragment(url, request_data, headers) if not success: return False, None, None, None raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: data = None if not data: data = json.loads(raw_fragment) live_chat_continuation = try_get( data, lambda x: x['continuationContents']['liveChatContinuation'], dict) or {} func = (info_dict['protocol'] == 'youtube_live_chat' and parse_actions_live or frag_index == 1 and try_refresh_replay_beginning or parse_actions_replay) return (True, *func(live_chat_continuation)) except HTTPError as err: retry.error = err continue return False, None, None, None self._prepare_and_start_frag_download(ctx, info_dict) success = dl_fragment(info_dict['url']) if not success: return False raw_fragment = self._read_fragment(ctx) try: data = ie.extract_yt_initial_data(video_id, raw_fragment.decode('utf-8', 'replace')) except RegexNotFoundError: return False continuation_id = try_get( data, lambda x: x['contents']['twoColumnWatchNextResults']['conversationBar']['liveChatRenderer']['continuations'][0]['reloadContinuationData']['continuation']) # no data yet but required to call _append_fragment self._append_fragment(ctx, b'') ytcfg = ie.extract_ytcfg(video_id, raw_fragment.decode('utf-8', 'replace')) if not ytcfg: return False api_key = try_get(ytcfg, lambda x: x['INNERTUBE_API_KEY']) innertube_context = try_get(ytcfg, lambda x: x['INNERTUBE_CONTEXT']) if not api_key or not innertube_context: return False visitor_data = try_get(innertube_context, lambda x: x['client']['visitorData'], str) if info_dict['protocol'] == 'youtube_live_chat_replay': url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat_replay?key=' + api_key chat_page_url = 'https://www.youtube.com/live_chat_replay?continuation=' + continuation_id elif info_dict['protocol'] == 'youtube_live_chat': url = 'https://www.youtube.com/youtubei/v1/live_chat/get_live_chat?key=' + api_key chat_page_url = 'https://www.youtube.com/live_chat?continuation=' + continuation_id frag_index = offset = 0 click_tracking_params = None while continuation_id is not None: frag_index += 1 request_data = { 'context': innertube_context, 'continuation': continuation_id, } if frag_index > 1: request_data['currentPlayerState'] = {'playerOffsetMs': str(max(offset - 5000, 0))} if click_tracking_params: request_data['context']['clickTracking'] = {'clickTrackingParams': click_tracking_params} headers = ie.generate_api_headers(ytcfg=ytcfg, visitor_data=visitor_data) headers.update({'content-type': 'application/json'}) fragment_request_data = json.dumps(request_data, ensure_ascii=False).encode() + b'\n' success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( url, frag_index, fragment_request_data, headers) else: success, continuation_id, offset, click_tracking_params = download_and_parse_fragment( chat_page_url, frag_index) if not success: return False if test: break return self._finish_frag_download(ctx, info_dict) @staticmethod def parse_live_timestamp(action): action_content = dict_get( action, ['addChatItemAction', 'addLiveChatTickerItemAction', 'addBannerToLiveChatCommand']) if not isinstance(action_content, dict): return None item = dict_get(action_content, ['item', 'bannerRenderer']) if not isinstance(item, dict): return None renderer = dict_get(item, [ # text 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', # ticker 'liveChatTickerPaidMessageItemRenderer', 'liveChatTickerSponsorItemRenderer', # banner 'liveChatBannerRenderer', ]) if not isinstance(renderer, dict): return None parent_item_getters = [ lambda x: x['showItemEndpoint']['showLiveChatItemEndpoint']['renderer'], lambda x: x['contents'], ] parent_item = try_get(renderer, parent_item_getters, dict) if parent_item: renderer = dict_get(parent_item, [ 'liveChatTextMessageRenderer', 'liveChatPaidMessageRenderer', 'liveChatMembershipItemRenderer', 'liveChatPaidStickerRenderer', ]) if not isinstance(renderer, dict): return None return int_or_none(renderer.get('timestampUsec'), 1000) yt-dlp-2024.09.27/yt_dlp/extractor/000077500000000000000000000000001467563447100166615ustar00rootroot00000000000000yt-dlp-2024.09.27/yt_dlp/extractor/__init__.py000066400000000000000000000025111467563447100207710ustar00rootroot00000000000000from ..compat.compat_utils import passthrough_module passthrough_module(__name__, '.extractors') del passthrough_module def gen_extractor_classes(): """ Return a list of supported extractors. The order does matter; the first extractor matched is the one handling the URL. """ from .extractors import _ALL_CLASSES return _ALL_CLASSES def gen_extractors(): """ Return a list of an instance of every supported extractor. The order does matter; the first extractor matched is the one handling the URL. """ return [klass() for klass in gen_extractor_classes()] def list_extractor_classes(age_limit=None): """Return a list of extractors that are suitable for the given age, sorted by extractor name""" from .generic import GenericIE yield from sorted(filter( lambda ie: ie.is_suitable(age_limit) and ie != GenericIE, gen_extractor_classes()), key=lambda ie: ie.IE_NAME.lower()) yield GenericIE def list_extractors(age_limit=None): """Return a list of extractor instances that are suitable for the given age, sorted by extractor name""" return [ie() for ie in list_extractor_classes(age_limit)] def get_info_extractor(ie_name): """Returns the info extractor class with the given ie_name""" from . import extractors return getattr(extractors, f'{ie_name}IE') yt-dlp-2024.09.27/yt_dlp/extractor/_extractors.py000066400000000000000000001514051467563447100215760ustar00rootroot00000000000000# flake8: noqa: F401 # isort: off from .youtube import ( # Youtube is moved to the top to improve performance YoutubeIE, YoutubeClipIE, YoutubeFavouritesIE, YoutubeNotificationsIE, YoutubeHistoryIE, YoutubeTabIE, YoutubeLivestreamEmbedIE, YoutubePlaylistIE, YoutubeRecommendedIE, YoutubeSearchDateIE, YoutubeSearchIE, YoutubeSearchURLIE, YoutubeMusicSearchURLIE, YoutubeSubscriptionsIE, YoutubeTruncatedIDIE, YoutubeTruncatedURLIE, YoutubeYtBeIE, YoutubeYtUserIE, YoutubeWatchLaterIE, YoutubeShortsAudioPivotIE, YoutubeConsentRedirectIE, ) # isort: on from .abc import ( ABCIE, ABCIViewIE, ABCIViewShowSeriesIE, ) from .abcnews import ( AbcNewsIE, AbcNewsVideoIE, ) from .abcotvs import ( ABCOTVSIE, ABCOTVSClipsIE, ) from .abematv import ( AbemaTVIE, AbemaTVTitleIE, ) from .academicearth import AcademicEarthCourseIE from .acast import ( ACastChannelIE, ACastIE, ) from .acfun import ( AcFunBangumiIE, AcFunVideoIE, ) from .adn import ( ADNIE, ADNSeasonIE, ) from .adobeconnect import AdobeConnectIE from .adobetv import ( AdobeTVChannelIE, AdobeTVEmbedIE, AdobeTVIE, AdobeTVShowIE, AdobeTVVideoIE, ) from .adultswim import AdultSwimIE from .aenetworks import ( AENetworksCollectionIE, AENetworksIE, AENetworksShowIE, BiographyIE, HistoryPlayerIE, HistoryTopicIE, ) from .aeonco import AeonCoIE from .afreecatv import ( AfreecaTVCatchStoryIE, AfreecaTVIE, AfreecaTVLiveIE, AfreecaTVUserIE, ) from .agora import ( TokFMAuditionIE, TokFMPodcastIE, WyborczaPodcastIE, WyborczaVideoIE, ) from .airtv import AirTVIE from .aitube import AitubeKZVideoIE from .aliexpress import AliExpressLiveIE from .aljazeera import AlJazeeraIE from .allocine import AllocineIE from .allstar import ( AllstarIE, AllstarProfileIE, ) from .alphaporno import AlphaPornoIE from .alsace20tv import ( Alsace20TVEmbedIE, Alsace20TVIE, ) from .altcensored import ( AltCensoredChannelIE, AltCensoredIE, ) from .alura import ( AluraCourseIE, AluraIE, ) from .amadeustv import AmadeusTVIE from .amara import AmaraIE from .amazon import ( AmazonReviewsIE, AmazonStoreIE, ) from .amazonminitv import ( AmazonMiniTVIE, AmazonMiniTVSeasonIE, AmazonMiniTVSeriesIE, ) from .amcnetworks import AMCNetworksIE from .americastestkitchen import ( AmericasTestKitchenIE, AmericasTestKitchenSeasonIE, ) from .anchorfm import AnchorFMEpisodeIE from .angel import AngelIE from .antenna import ( Ant1NewsGrArticleIE, Ant1NewsGrEmbedIE, AntennaGrWatchIE, ) from .anvato import AnvatoIE from .aol import AolIE from .apa import APAIE from .aparat import AparatIE from .appleconnect import AppleConnectIE from .applepodcasts import ApplePodcastsIE from .appletrailers import ( AppleTrailersIE, AppleTrailersSectionIE, ) from .archiveorg import ( ArchiveOrgIE, YoutubeWebArchiveIE, ) from .arcpublishing import ArcPublishingIE from .ard import ( ARDIE, ARDBetaMediathekIE, ARDMediathekCollectionIE, ) from .arkena import ArkenaIE from .arnes import ArnesIE from .art19 import ( Art19IE, Art19ShowIE, ) from .arte import ( ArteTVCategoryIE, ArteTVEmbedIE, ArteTVIE, ArteTVPlaylistIE, ) from .asobichannel import ( AsobiChannelIE, AsobiChannelTagURLIE, ) from .asobistage import AsobiStageIE from .atresplayer import AtresPlayerIE from .atscaleconf import AtScaleConfEventIE from .atvat import ATVAtIE from .audimedia import AudiMediaIE from .audioboom import AudioBoomIE from .audiodraft import ( AudiodraftCustomIE, AudiodraftGenericIE, ) from .audiomack import ( AudiomackAlbumIE, AudiomackIE, ) from .audius import ( AudiusIE, AudiusPlaylistIE, AudiusProfileIE, AudiusTrackIE, ) from .awaan import ( AWAANIE, AWAANLiveIE, AWAANSeasonIE, AWAANVideoIE, ) from .axs import AxsIE from .azmedien import AZMedienIE from .baidu import BaiduVideoIE from .banbye import ( BanByeChannelIE, BanByeIE, ) from .bandaichannel import BandaiChannelIE from .bandcamp import ( BandcampAlbumIE, BandcampIE, BandcampUserIE, BandcampWeeklyIE, ) from .bannedvideo import BannedVideoIE from .bbc import ( BBCIE, BBCCoUkArticleIE, BBCCoUkIE, BBCCoUkIPlayerEpisodesIE, BBCCoUkIPlayerGroupIE, BBCCoUkPlaylistIE, ) from .beacon import BeaconTvIE from .beatbump import ( BeatBumpPlaylistIE, BeatBumpVideoIE, ) from .beatport import BeatportIE from .beeg import BeegIE from .behindkink import BehindKinkIE from .bellmedia import BellMediaIE from .berufetv import BerufeTVIE from .bet import BetIE from .bfi import BFIPlayerIE from .bfmtv import ( BFMTVIE, BFMTVArticleIE, BFMTVLiveIE, ) from .bibeltv import ( BibelTVLiveIE, BibelTVSeriesIE, BibelTVVideoIE, ) from .bigflix import BigflixIE from .bigo import BigoIE from .bild import BildIE from .bilibili import ( BilibiliAudioAlbumIE, BilibiliAudioIE, BiliBiliBangumiIE, BiliBiliBangumiMediaIE, BiliBiliBangumiSeasonIE, BilibiliCategoryIE, BilibiliCheeseIE, BilibiliCheeseSeasonIE, BilibiliCollectionListIE, BilibiliFavoritesListIE, BiliBiliIE, BiliBiliPlayerIE, BilibiliPlaylistIE, BiliBiliSearchIE, BilibiliSeriesListIE, BilibiliSpaceAudioIE, BilibiliSpaceVideoIE, BilibiliWatchlaterIE, BiliIntlIE, BiliIntlSeriesIE, BiliLiveIE, ) from .biobiochiletv import BioBioChileTVIE from .bitchute import ( BitChuteChannelIE, BitChuteIE, ) from .blackboardcollaborate import BlackboardCollaborateIE from .bleacherreport import ( BleacherReportCMSIE, BleacherReportIE, ) from .blerp import BlerpIE from .blogger import BloggerIE from .bloomberg import BloombergIE from .bokecc import BokeCCIE from .bongacams import BongaCamsIE from .boosty import BoostyIE from .bostonglobe import BostonGlobeIE from .box import BoxIE from .boxcast import BoxCastVideoIE from .bpb import BpbIE from .br import BRIE from .brainpop import ( BrainPOPELLIE, BrainPOPEspIE, BrainPOPFrIE, BrainPOPIE, BrainPOPIlIE, BrainPOPJrIE, ) from .bravotv import BravoTVIE from .breitbart import BreitBartIE from .brightcove import ( BrightcoveLegacyIE, BrightcoveNewIE, ) from .brilliantpala import ( BrilliantpalaClassesIE, BrilliantpalaElearnIE, ) from .bundesliga import BundesligaIE from .bundestag import BundestagIE from .businessinsider import BusinessInsiderIE from .buzzfeed import BuzzFeedIE from .byutv import BYUtvIE from .c56 import C56IE from .caffeinetv import CaffeineTVIE from .callin import CallinIE from .caltrans import CaltransIE from .cam4 import CAM4IE from .camdemy import ( CamdemyFolderIE, CamdemyIE, ) from .camfm import ( CamFMEpisodeIE, CamFMShowIE, ) from .cammodels import CamModelsIE from .camsoda import CamsodaIE from .camtasia import CamtasiaEmbedIE from .canal1 import Canal1IE from .canalalpha import CanalAlphaIE from .canalc2 import Canalc2IE from .canalplus import CanalplusIE from .caracoltv import CaracolTvPlayIE from .cartoonnetwork import CartoonNetworkIE from .cbc import ( CBCIE, CBCGemIE, CBCGemLiveIE, CBCGemPlaylistIE, CBCPlayerIE, CBCPlayerPlaylistIE, ) from .cbs import ( CBSIE, ParamountPressExpressIE, ) from .cbsnews import ( CBSLocalArticleIE, CBSLocalIE, CBSLocalLiveIE, CBSNewsEmbedIE, CBSNewsIE, CBSNewsLiveIE, CBSNewsLiveVideoIE, ) from .cbssports import ( CBSSportsEmbedIE, CBSSportsIE, TwentyFourSevenSportsIE, ) from .ccc import ( CCCIE, CCCPlaylistIE, ) from .ccma import CCMAIE from .cctv import CCTVIE from .cda import CDAIE from .cellebrite import CellebriteIE from .ceskatelevize import CeskaTelevizeIE from .cgtn import CGTNIE from .charlierose import CharlieRoseIE from .chaturbate import ChaturbateIE from .chilloutzone import ChilloutzoneIE from .chzzk import ( CHZZKLiveIE, CHZZKVideoIE, ) from .cinemax import CinemaxIE from .cinetecamilano import CinetecaMilanoIE from .cineverse import ( CineverseDetailsIE, CineverseIE, ) from .ciscolive import ( CiscoLiveSearchIE, CiscoLiveSessionIE, ) from .ciscowebex import CiscoWebexIE from .cjsw import CJSWIE from .clipchamp import ClipchampIE from .clippit import ClippitIE from .cliprs import ClipRsIE from .closertotruth import CloserToTruthIE from .cloudflarestream import CloudflareStreamIE from .cloudycdn import CloudyCDNIE from .clubic import ClubicIE from .clyp import ClypIE from .cmt import CMTIE from .cnbc import CNBCVideoIE from .cnn import ( CNNIE, CNNArticleIE, CNNBlogsIE, CNNIndonesiaIE, ) from .comedycentral import ( ComedyCentralIE, ComedyCentralTVIE, ) from .commonmistakes import ( BlobIE, CommonMistakesIE, UnicodeBOMIE, ) from .commonprotocols import ( MmsIE, RtmpIE, ViewSourceIE, ) from .condenast import CondeNastIE from .contv import CONtvIE from .corus import CorusIE from .coub import CoubIE from .cozytv import CozyTVIE from .cpac import ( CPACIE, CPACPlaylistIE, ) from .cracked import CrackedIE from .crackle import CrackleIE from .craftsy import CraftsyIE from .crooksandliars import CrooksAndLiarsIE from .crowdbunker import ( CrowdBunkerChannelIE, CrowdBunkerIE, ) from .crtvg import CrtvgIE from .crunchyroll import ( CrunchyrollArtistIE, CrunchyrollBetaIE, CrunchyrollBetaShowIE, CrunchyrollMusicIE, ) from .cspan import ( CSpanCongressIE, CSpanIE, ) from .ctsnews import CtsNewsIE from .ctv import CTVIE from .ctvnews import CTVNewsIE from .cultureunplugged import CultureUnpluggedIE from .curiositystream import ( CuriosityStreamCollectionsIE, CuriosityStreamIE, CuriosityStreamSeriesIE, ) from .cwtv import CWTVIE from .cybrary import ( CybraryCourseIE, CybraryIE, ) from .dacast import ( DacastPlaylistIE, DacastVODIE, ) from .dailymail import DailyMailIE from .dailymotion import ( DailymotionIE, DailymotionPlaylistIE, DailymotionSearchIE, DailymotionUserIE, ) from .dailywire import ( DailyWireIE, DailyWirePodcastIE, ) from .damtomo import ( DamtomoRecordIE, DamtomoVideoIE, ) from .dangalplay import ( DangalPlayIE, DangalPlaySeasonIE, ) from .daum import ( DaumClipIE, DaumIE, DaumPlaylistIE, DaumUserIE, ) from .daystar import DaystarClipIE from .dbtv import DBTVIE from .dctp import DctpTvIE from .deezer import ( DeezerAlbumIE, DeezerPlaylistIE, ) from .democracynow import DemocracynowIE from .detik import DetikEmbedIE from .deuxm import ( DeuxMIE, DeuxMNewsIE, ) from .dfb import DFBIE from .dhm import DHMIE from .digitalconcerthall import DigitalConcertHallIE from .digiteka import DigitekaIE from .discogs import DiscogsReleasePlaylistIE from .disney import DisneyIE from .dispeak import DigitallySpeakingIE from .dlf import ( DLFIE, DLFCorpusIE, ) from .dlive import ( DLiveStreamIE, DLiveVODIE, ) from .douyutv import ( DouyuShowIE, DouyuTVIE, ) from .dplay import ( TLCIE, AmHistoryChannelIE, AnimalPlanetIE, CookingChannelIE, DestinationAmericaIE, DiscoveryLifeIE, DiscoveryNetworksDeIE, DiscoveryPlusIE, DiscoveryPlusIndiaIE, DiscoveryPlusIndiaShowIE, DiscoveryPlusItalyIE, DiscoveryPlusItalyShowIE, DPlayIE, FoodNetworkIE, GoDiscoveryIE, HGTVDeIE, HGTVUsaIE, InvestigationDiscoveryIE, ScienceChannelIE, TravelChannelIE, ) from .drbonanza import DRBonanzaIE from .dreisat import DreiSatIE from .drooble import DroobleIE from .dropbox import DropboxIE from .dropout import ( DropoutIE, DropoutSeasonIE, ) from .drtuber import DrTuberIE from .drtv import ( DRTVIE, DRTVLiveIE, DRTVSeasonIE, DRTVSeriesIE, ) from .dtube import DTubeIE from .duboku import ( DubokuIE, DubokuPlaylistIE, ) from .dumpert import DumpertIE from .duoplay import DuoplayIE from .dvtv import DVTVIE from .dw import ( DWIE, DWArticleIE, ) from .eagleplatform import ( ClipYouEmbedIE, EaglePlatformIE, ) from .ebaumsworld import EbaumsWorldIE from .ebay import EbayIE from .egghead import ( EggheadCourseIE, EggheadLessonIE, ) from .eighttracks import EightTracksIE from .eitb import EitbIE from .elementorembed import ElementorEmbedIE from .elonet import ElonetIE from .elpais import ElPaisIE from .eltrecetv import ElTreceTVIE from .embedly import EmbedlyIE from .epicon import ( EpiconIE, EpiconSeriesIE, ) from .epidemicsound import EpidemicSoundIE from .eplus import EplusIbIE from .epoch import EpochIE from .eporner import EpornerIE from .erocast import ErocastIE from .eroprofile import ( EroProfileAlbumIE, EroProfileIE, ) from .err import ERRJupiterIE from .ertgr import ( ERTFlixCodenameIE, ERTFlixIE, ERTWebtvEmbedIE, ) from .espn import ( ESPNIE, ESPNArticleIE, ESPNCricInfoIE, FiveThirtyEightIE, WatchESPNIE, ) from .ettutv import EttuTvIE from .europa import ( EuropaIE, EuroParlWebstreamIE, ) from .europeantour import EuropeanTourIE from .eurosport import EurosportIE from .euscreen import EUScreenIE from .expressen import ExpressenIE from .eyedotv import EyedoTVIE from .facebook import ( FacebookAdsIE, FacebookIE, FacebookPluginsVideoIE, FacebookRedirectURLIE, FacebookReelIE, ) from .fancode import ( FancodeLiveIE, FancodeVodIE, ) from .fathom import FathomIE from .faz import FazIE from .fc2 import ( FC2IE, FC2EmbedIE, FC2LiveIE, ) from .fczenit import FczenitIE from .fifa import FifaIE from .filmon import ( FilmOnChannelIE, FilmOnIE, ) from .filmweb import FilmwebIE from .firsttv import FirstTVIE from .fivetv import FiveTVIE from .flextv import FlexTVIE from .flickr import FlickrIE from .floatplane import ( FloatplaneChannelIE, FloatplaneIE, ) from .folketinget import FolketingetIE from .footyroom import FootyRoomIE from .formula1 import Formula1IE from .fourtube import ( FourTubeIE, FuxIE, PornerBrosIE, PornTubeIE, ) from .fox import FOXIE from .fox9 import ( FOX9IE, FOX9NewsIE, ) from .foxnews import ( FoxNewsArticleIE, FoxNewsIE, FoxNewsVideoIE, ) from .foxsports import FoxSportsIE from .fptplay import FptplayIE from .franceinter import FranceInterIE from .francetv import ( FranceTVIE, FranceTVInfoIE, FranceTVSiteIE, ) from .freesound import FreesoundIE from .freespeech import FreespeechIE from .freetv import ( FreeTvIE, FreeTvMoviesIE, ) from .frontendmasters import ( FrontendMastersCourseIE, FrontendMastersIE, FrontendMastersLessonIE, ) from .fujitv import FujiTVFODPlus7IE from .funimation import ( FunimationIE, FunimationPageIE, FunimationShowIE, ) from .funk import FunkIE from .funker530 import Funker530IE from .fuyintv import FuyinTVIE from .gab import ( GabIE, GabTVIE, ) from .gaia import GaiaIE from .gamejolt import ( GameJoltCommunityIE, GameJoltGameIE, GameJoltGameSoundtrackIE, GameJoltIE, GameJoltSearchIE, GameJoltUserIE, ) from .gamespot import GameSpotIE from .gamestar import GameStarIE from .gaskrank import GaskrankIE from .gazeta import GazetaIE from .gbnews import GBNewsIE from .gdcvault import GDCVaultIE from .gedidigital import GediDigitalIE from .generic import GenericIE from .genericembeds import ( HTML5MediaEmbedIE, QuotedHTMLIE, ) from .genius import ( GeniusIE, GeniusLyricsIE, ) from .germanupa import GermanupaIE from .getcourseru import ( GetCourseRuIE, GetCourseRuPlayerIE, ) from .gettr import ( GettrIE, GettrStreamingIE, ) from .giantbomb import GiantBombIE from .glide import GlideIE from .globalplayer import ( GlobalPlayerAudioEpisodeIE, GlobalPlayerAudioIE, GlobalPlayerLiveIE, GlobalPlayerLivePlaylistIE, GlobalPlayerVideoIE, ) from .globo import ( GloboArticleIE, GloboIE, ) from .glomex import ( GlomexEmbedIE, GlomexIE, ) from .gmanetwork import GMANetworkVideoIE from .go import GoIE from .godresource import GodResourceIE from .godtube import GodTubeIE from .gofile import GofileIE from .golem import GolemIE from .goodgame import GoodGameIE from .googledrive import ( GoogleDriveFolderIE, GoogleDriveIE, ) from .googlepodcasts import ( GooglePodcastsFeedIE, GooglePodcastsIE, ) from .googlesearch import GoogleSearchIE from .goplay import GoPlayIE from .gopro import GoProIE from .goshgay import GoshgayIE from .gotostage import GoToStageIE from .gputechconf import GPUTechConfIE from .graspop import GraspopIE from .gronkh import ( GronkhFeedIE, GronkhIE, GronkhVodsIE, ) from .groupon import GrouponIE from .harpodeon import HarpodeonIE from .hbo import HBOIE from .hearthisat import HearThisAtIE from .heise import HeiseIE from .hellporno import HellPornoIE from .hgtv import HGTVComShowIE from .hidive import HiDiveIE from .historicfilms import HistoricFilmsIE from .hitrecord import HitRecordIE from .hketv import HKETVIE from .hollywoodreporter import ( HollywoodReporterIE, HollywoodReporterPlaylistIE, ) from .holodex import HolodexIE from .hotnewhiphop import HotNewHipHopIE from .hotstar import ( HotStarIE, HotStarPlaylistIE, HotStarPrefixIE, HotStarSeasonIE, HotStarSeriesIE, ) from .hrefli import HrefLiRedirectIE from .hrfensehen import HRFernsehenIE from .hrti import ( HRTiIE, HRTiPlaylistIE, ) from .hse import ( HSEProductIE, HSEShowIE, ) from .huajiao import HuajiaoIE from .huffpost import HuffPostIE from .hungama import ( HungamaAlbumPlaylistIE, HungamaIE, HungamaSongIE, ) from .huya import ( HuyaLiveIE, HuyaVideoIE, ) from .hypem import HypemIE from .hypergryph import MonsterSirenHypergryphMusicIE from .hytale import HytaleIE from .icareus import IcareusIE from .ichinanalive import ( IchinanaLiveClipIE, IchinanaLiveIE, ) from .idolplus import IdolPlusIE from .ign import ( IGNIE, IGNArticleIE, IGNVideoIE, ) from .iheart import ( IHeartRadioIE, IHeartRadioPodcastIE, ) from .ilpost import IlPostIE from .iltalehti import IltalehtiIE from .imdb import ( ImdbIE, ImdbListIE, ) from .imgur import ( ImgurAlbumIE, ImgurGalleryIE, ImgurIE, ) from .ina import InaIE from .inc import IncIE from .indavideo import IndavideoEmbedIE from .infoq import InfoQIE from .instagram import ( InstagramIE, InstagramIOSIE, InstagramStoryIE, InstagramTagIE, InstagramUserIE, ) from .internazionale import InternazionaleIE from .internetvideoarchive import InternetVideoArchiveIE from .iprima import ( IPrimaCNNIE, IPrimaIE, ) from .iqiyi import ( IqAlbumIE, IqIE, IqiyiIE, ) from .islamchannel import ( IslamChannelIE, IslamChannelSeriesIE, ) from .israelnationalnews import IsraelNationalNewsIE from .itprotv import ( ITProTVCourseIE, ITProTVIE, ) from .itv import ( ITVBTCCIE, ITVIE, ) from .ivi import ( IviCompilationIE, IviIE, ) from .ivideon import IvideonIE from .iwara import ( IwaraIE, IwaraPlaylistIE, IwaraUserIE, ) from .ixigua import IxiguaIE from .izlesene import IzleseneIE from .jamendo import ( JamendoAlbumIE, JamendoIE, ) from .japandiet import ( SangiinIE, SangiinInstructionIE, ShugiinItvLiveIE, ShugiinItvLiveRoomIE, ShugiinItvVodIE, ) from .jeuxvideo import JeuxVideoIE from .jiocinema import ( JioCinemaIE, JioCinemaSeriesIE, ) from .jiosaavn import ( JioSaavnAlbumIE, JioSaavnPlaylistIE, JioSaavnSongIE, ) from .joj import JojIE from .joqrag import JoqrAgIE from .jove import JoveIE from .jstream import JStreamIE from .jtbc import ( JTBCIE, JTBCProgramIE, ) from .jwplatform import JWPlatformIE from .kakao import KakaoIE from .kaltura import KalturaIE from .kankanews import KankaNewsIE from .karaoketv import KaraoketvIE from .kelbyone import KelbyOneIE from .khanacademy import ( KhanAcademyIE, KhanAcademyUnitIE, ) from .kick import ( KickClipIE, KickIE, KickVODIE, ) from .kicker import KickerIE from .kickstarter import KickStarterIE from .kika import KikaIE from .kinja import KinjaEmbedIE from .kinopoisk import KinoPoiskIE from .kommunetv import KommunetvIE from .kompas import KompasVideoIE from .koo import KooIE from .krasview import KrasViewIE from .kth import KTHIE from .ku6 import Ku6IE from .kukululive import KukuluLiveIE from .kuwo import ( KuwoAlbumIE, KuwoCategoryIE, KuwoChartIE, KuwoIE, KuwoMvIE, KuwoSingerIE, ) from .la7 import ( LA7IE, LA7PodcastEpisodeIE, LA7PodcastIE, ) from .laracasts import ( LaracastsIE, LaracastsPlaylistIE, ) from .lastfm import ( LastFMIE, LastFMPlaylistIE, LastFMUserIE, ) from .laxarxames import LaXarxaMesIE from .lbry import ( LBRYIE, LBRYChannelIE, LBRYPlaylistIE, ) from .lci import LCIIE from .lcp import ( LcpIE, LcpPlayIE, ) from .learningonscreen import LearningOnScreenIE from .lecture2go import Lecture2GoIE from .lecturio import ( LecturioCourseIE, LecturioDeCourseIE, LecturioIE, ) from .leeco import ( LeIE, LePlaylistIE, LetvCloudIE, ) from .lefigaro import ( LeFigaroVideoEmbedIE, LeFigaroVideoSectionIE, ) from .lego import LEGOIE from .lemonde import LemondeIE from .lenta import LentaIE from .libraryofcongress import LibraryOfCongressIE from .libsyn import LibsynIE from .lifenews import ( LifeEmbedIE, LifeNewsIE, ) from .likee import ( LikeeIE, LikeeUserIE, ) from .limelight import ( LimelightChannelIE, LimelightChannelListIE, LimelightMediaIE, ) from .linkedin import ( LinkedInIE, LinkedInLearningCourseIE, LinkedInLearningIE, ) from .liputan6 import Liputan6IE from .listennotes import ListenNotesIE from .litv import LiTVIE from .livejournal import LiveJournalIE from .livestream import ( LivestreamIE, LivestreamOriginalIE, LivestreamShortenerIE, ) from .livestreamfails import LivestreamfailsIE from .lnk import LnkIE from .loom import ( LoomFolderIE, LoomIE, ) from .lovehomeporn import LoveHomePornIE from .lrt import ( LRTVODIE, LRTStreamIE, ) from .lsm import ( LSMLREmbedIE, LSMLTVEmbedIE, LSMReplayIE, ) from .lumni import LumniIE from .lynda import ( LyndaCourseIE, LyndaIE, ) from .maariv import MaarivIE from .magellantv import MagellanTVIE from .magentamusik import MagentaMusikIE from .mailru import ( MailRuIE, MailRuMusicIE, MailRuMusicSearchIE, ) from .mainstreaming import MainStreamingIE from .mangomolo import ( MangomoloLiveIE, MangomoloVideoIE, ) from .manoto import ( ManotoTVIE, ManotoTVLiveIE, ManotoTVShowIE, ) from .manyvids import ManyVidsIE from .maoritv import MaoriTVIE from .markiza import ( MarkizaIE, MarkizaPageIE, ) from .massengeschmacktv import MassengeschmackTVIE from .masters import MastersIE from .matchtv import MatchTVIE from .mbn import MBNIE from .mdr import MDRIE from .medaltv import MedalTVIE from .mediaite import MediaiteIE from .mediaklikk import MediaKlikkIE from .medialaan import MedialaanIE from .mediaset import ( MediasetIE, MediasetShowIE, ) from .mediasite import ( MediasiteCatalogIE, MediasiteIE, MediasiteNamedCatalogIE, ) from .mediastream import ( MediaStreamIE, WinSportsVideoIE, ) from .mediaworksnz import MediaWorksNZVODIE from .medici import MediciIE from .megaphone import MegaphoneIE from .megatvcom import ( MegaTVComEmbedIE, MegaTVComIE, ) from .meipai import MeipaiIE from .melonvod import MelonVODIE from .metacritic import MetacriticIE from .mgtv import MGTVIE from .microsoftembed import ( MicrosoftBuildIE, MicrosoftEmbedIE, MicrosoftLearnEpisodeIE, MicrosoftLearnPlaylistIE, MicrosoftLearnSessionIE, MicrosoftMediusIE, ) from .microsoftstream import MicrosoftStreamIE from .mildom import ( MildomClipIE, MildomIE, MildomUserVodIE, MildomVodIE, ) from .minds import ( MindsChannelIE, MindsGroupIE, MindsIE, ) from .minoto import MinotoIE from .mirrativ import ( MirrativIE, MirrativUserIE, ) from .mirrorcouk import MirrorCoUKIE from .mit import ( OCWMITIE, TechTVMITIE, ) from .mitele import MiTeleIE from .mixch import ( MixchArchiveIE, MixchIE, ) from .mixcloud import ( MixcloudIE, MixcloudPlaylistIE, MixcloudUserIE, ) from .mlb import ( MLBIE, MLBTVIE, MLBArticleIE, MLBVideoIE, ) from .mlssoccer import MLSSoccerIE from .mocha import MochaVideoIE from .mojevideo import MojevideoIE from .mojvideo import MojvideoIE from .monstercat import MonstercatIE from .motherless import ( MotherlessGalleryIE, MotherlessGroupIE, MotherlessIE, MotherlessUploaderIE, ) from .motorsport import MotorsportIE from .moviepilot import MoviepilotIE from .moview import MoviewPlayIE from .moviezine import MoviezineIE from .movingimage import MovingImageIE from .msn import MSNIE from .mtv import ( MTVDEIE, MTVIE, MTVItaliaIE, MTVItaliaProgrammaIE, MTVJapanIE, MTVServicesEmbeddedIE, MTVVideoIE, ) from .muenchentv import MuenchenTVIE from .murrtube import ( MurrtubeIE, MurrtubeUserIE, ) from .museai import MuseAIIE from .musescore import MuseScoreIE from .musicdex import ( MusicdexAlbumIE, MusicdexArtistIE, MusicdexPlaylistIE, MusicdexSongIE, ) from .mx3 import ( Mx3IE, Mx3NeoIE, Mx3VolksmusikIE, ) from .mxplayer import ( MxplayerIE, MxplayerShowIE, ) from .myspace import ( MySpaceAlbumIE, MySpaceIE, ) from .myspass import MySpassIE from .myvideoge import MyVideoGeIE from .myvidster import MyVidsterIE from .mzaalo import MzaaloIE from .n1 import ( N1InfoAssetIE, N1InfoIIE, ) from .nate import ( NateIE, NateProgramIE, ) from .nationalgeographic import ( NationalGeographicTVIE, NationalGeographicVideoIE, ) from .naver import ( NaverIE, NaverLiveIE, NaverNowIE, ) from .nba import ( NBAIE, NBAChannelIE, NBAEmbedIE, NBAWatchCollectionIE, NBAWatchEmbedIE, NBAWatchIE, ) from .nbc import ( NBCIE, NBCNewsIE, NBCOlympicsIE, NBCOlympicsStreamIE, NBCSportsIE, NBCSportsStreamIE, NBCSportsVPlayerIE, NBCStationsIE, ) from .ndr import ( NDRIE, NDREmbedBaseIE, NDREmbedIE, NJoyEmbedIE, NJoyIE, ) from .ndtv import NDTVIE from .nebula import ( NebulaChannelIE, NebulaClassIE, NebulaIE, NebulaSubscriptionsIE, ) from .nekohacker import NekoHackerIE from .nerdcubed import NerdCubedFeedIE from .neteasemusic import ( NetEaseMusicAlbumIE, NetEaseMusicDjRadioIE, NetEaseMusicIE, NetEaseMusicListIE, NetEaseMusicMvIE, NetEaseMusicProgramIE, NetEaseMusicSingerIE, ) from .netverse import ( NetverseIE, NetversePlaylistIE, NetverseSearchIE, ) from .netzkino import NetzkinoIE from .newgrounds import ( NewgroundsIE, NewgroundsPlaylistIE, NewgroundsUserIE, ) from .newspicks import NewsPicksIE from .newsy import NewsyIE from .nextmedia import ( AppleDailyIE, NextMediaActionNewsIE, NextMediaIE, NextTVIE, ) from .nexx import ( NexxEmbedIE, NexxIE, ) from .nfb import ( NFBIE, NFBSeriesIE, ) from .nfhsnetwork import NFHSNetworkIE from .nfl import ( NFLIE, NFLArticleIE, NFLPlusEpisodeIE, NFLPlusReplayIE, ) from .nhk import ( NhkForSchoolBangumiIE, NhkForSchoolProgramListIE, NhkForSchoolSubjectIE, NhkRadioNewsPageIE, NhkRadiruIE, NhkRadiruLiveIE, NhkVodIE, NhkVodProgramIE, ) from .nhl import NHLIE from .nick import ( NickBrIE, NickDeIE, NickIE, NickRuIE, ) from .niconico import ( NiconicoHistoryIE, NiconicoIE, NiconicoLiveIE, NiconicoPlaylistIE, NiconicoSeriesIE, NiconicoUserIE, NicovideoSearchDateIE, NicovideoSearchIE, NicovideoSearchURLIE, NicovideoTagURLIE, ) from .niconicochannelplus import ( NiconicoChannelPlusChannelLivesIE, NiconicoChannelPlusChannelVideosIE, NiconicoChannelPlusIE, ) from .ninaprotocol import NinaProtocolIE from .ninecninemedia import ( CPTwentyFourIE, NineCNineMediaIE, ) from .ninegag import NineGagIE from .ninenews import NineNewsIE from .ninenow import NineNowIE from .nintendo import NintendoIE from .nitter import NitterIE from .nobelprize import NobelPrizeIE from .noice import NoicePodcastIE from .nonktube import NonkTubeIE from .noodlemagazine import NoodleMagazineIE from .noovo import NoovoIE from .nosnl import NOSNLArticleIE from .nova import ( NovaEmbedIE, NovaIE, ) from .novaplay import NovaPlayIE from .nowness import ( NownessIE, NownessPlaylistIE, NownessSeriesIE, ) from .noz import NozIE from .npo import ( NPOIE, VPROIE, WNLIE, AndereTijdenIE, HetKlokhuisIE, NPOLiveIE, NPORadioFragmentIE, NPORadioIE, SchoolTVIE, ) from .npr import NprIE from .nrk import ( NRKIE, NRKTVIE, NRKPlaylistIE, NRKRadioPodkastIE, NRKSkoleIE, NRKTVDirekteIE, NRKTVEpisodeIE, NRKTVEpisodesIE, NRKTVSeasonIE, NRKTVSeriesIE, ) from .nrl import NRLTVIE from .nts import NTSLiveIE from .ntvcojp import NTVCoJpCUIE from .ntvde import NTVDeIE from .ntvru import NTVRuIE from .nubilesporn import NubilesPornIE from .nuum import ( NuumLiveIE, NuumMediaIE, NuumTabIE, ) from .nuvid import NuvidIE from .nytimes import ( NYTimesArticleIE, NYTimesCookingIE, NYTimesCookingRecipeIE, NYTimesIE, ) from .nzherald import NZHeraldIE from .nzonscreen import NZOnScreenIE from .nzz import NZZIE from .odkmedia import OnDemandChinaEpisodeIE from .odnoklassniki import OdnoklassnikiIE from .oftv import ( OfTVIE, OfTVPlaylistIE, ) from .oktoberfesttv import OktoberfestTVIE from .olympics import OlympicsReplayIE from .on24 import On24IE from .ondemandkorea import ( OnDemandKoreaIE, OnDemandKoreaProgramIE, ) from .onefootball import OneFootballIE from .onenewsnz import OneNewsNZIE from .oneplace import OnePlacePodcastIE from .onet import ( OnetChannelIE, OnetIE, OnetMVPIE, OnetPlIE, ) from .onionstudios import OnionStudiosIE from .opencast import ( OpencastIE, OpencastPlaylistIE, ) from .openrec import ( OpenRecCaptureIE, OpenRecIE, OpenRecMovieIE, ) from .ora import OraTVIE from .orf import ( ORFIPTVIE, ORFONIE, ORFFM4StoryIE, ORFPodcastIE, ORFRadioIE, ) from .outsidetv import OutsideTVIE from .owncloud import OwnCloudIE from .packtpub import ( PacktPubCourseIE, PacktPubIE, ) from .palcomp3 import ( PalcoMP3ArtistIE, PalcoMP3IE, PalcoMP3VideoIE, ) from .panopto import ( PanoptoIE, PanoptoListIE, PanoptoPlaylistIE, ) from .paramountplus import ( ParamountPlusIE, ParamountPlusSeriesIE, ) from .parler import ParlerIE from .parlview import ParlviewIE from .patreon import ( PatreonCampaignIE, PatreonIE, ) from .pbs import ( PBSIE, PBSKidsIE, ) from .pearvideo import PearVideoIE from .peekvids import ( PeekVidsIE, PlayVidsIE, ) from .peertube import ( PeerTubeIE, PeerTubePlaylistIE, ) from .peertv import PeerTVIE from .peloton import ( PelotonIE, PelotonLiveIE, ) from .performgroup import PerformGroupIE from .periscope import ( PeriscopeIE, PeriscopeUserIE, ) from .pgatour import PGATourIE from .philharmoniedeparis import PhilharmonieDeParisIE from .phoenix import PhoenixIE from .photobucket import PhotobucketIE from .piapro import PiaproIE from .piaulizaportal import PIAULIZAPortalIE from .picarto import ( PicartoIE, PicartoVodIE, ) from .piksel import PikselIE from .pinkbike import PinkbikeIE from .pinterest import ( PinterestCollectionIE, PinterestIE, ) from .pixivsketch import ( PixivSketchIE, PixivSketchUserIE, ) from .pladform import PladformIE from .planetmarathi import PlanetMarathiIE from .platzi import ( PlatziCourseIE, PlatziIE, ) from .playplustv import PlayPlusTVIE from .playsuisse import PlaySuisseIE from .playtvak import PlaytvakIE from .playwire import PlaywireIE from .pluralsight import ( PluralsightCourseIE, PluralsightIE, ) from .plutotv import PlutoTVIE from .podbayfm import ( PodbayFMChannelIE, PodbayFMIE, ) from .podchaser import PodchaserIE from .podomatic import PodomaticIE from .pokemon import ( PokemonIE, PokemonWatchIE, ) from .pokergo import ( PokerGoCollectionIE, PokerGoIE, ) from .polsatgo import PolsatGoIE from .polskieradio import ( PolskieRadioAuditionIE, PolskieRadioCategoryIE, PolskieRadioIE, PolskieRadioLegacyIE, PolskieRadioPlayerIE, PolskieRadioPodcastIE, PolskieRadioPodcastListIE, ) from .popcorntimes import PopcorntimesIE from .popcorntv import PopcornTVIE from .pornbox import PornboxIE from .pornflip import PornFlipIE from .pornhub import ( PornHubIE, PornHubPagedVideoListIE, PornHubPlaylistIE, PornHubUserIE, PornHubUserVideosUploadIE, ) from .pornotube import PornotubeIE from .pornovoisines import PornoVoisinesIE from .pornoxo import PornoXOIE from .pr0gramm import Pr0grammIE from .prankcast import ( PrankCastIE, PrankCastPostIE, ) from .premiershiprugby import PremiershipRugbyIE from .presstv import PressTVIE from .projectveritas import ProjectVeritasIE from .prosiebensat1 import ProSiebenSat1IE from .prx import ( PRXAccountIE, PRXSeriesIE, PRXSeriesSearchIE, PRXStoriesSearchIE, PRXStoryIE, ) from .puhutv import ( PuhuTVIE, PuhuTVSerieIE, ) from .puls4 import Puls4IE from .pyvideo import PyvideoIE from .qdance import QDanceIE from .qingting import QingTingIE from .qqmusic import ( QQMusicAlbumIE, QQMusicIE, QQMusicPlaylistIE, QQMusicSingerIE, QQMusicToplistIE, QQMusicVideoIE, ) from .r7 import ( R7IE, R7ArticleIE, ) from .radiko import ( RadikoIE, RadikoRadioIE, ) from .radiocanada import ( RadioCanadaAudioVideoIE, RadioCanadaIE, ) from .radiocomercial import ( RadioComercialIE, RadioComercialPlaylistIE, ) from .radiode import RadioDeIE from .radiofrance import ( FranceCultureIE, RadioFranceIE, RadioFranceLiveIE, RadioFrancePodcastIE, RadioFranceProfileIE, RadioFranceProgramScheduleIE, ) from .radiojavan import RadioJavanIE from .radiokapital import ( RadioKapitalIE, RadioKapitalShowIE, ) from .radiozet import RadioZetPodcastIE from .radlive import ( RadLiveChannelIE, RadLiveIE, RadLiveSeasonIE, ) from .rai import ( RaiCulturaIE, RaiIE, RaiNewsIE, RaiPlayIE, RaiPlayLiveIE, RaiPlayPlaylistIE, RaiPlaySoundIE, RaiPlaySoundLiveIE, RaiPlaySoundPlaylistIE, RaiSudtirolIE, ) from .raywenderlich import ( RayWenderlichCourseIE, RayWenderlichIE, ) from .rbgtum import ( RbgTumCourseIE, RbgTumIE, RbgTumNewCourseIE, ) from .rcs import ( RCSIE, RCSEmbedsIE, RCSVariousIE, ) from .rcti import ( RCTIPlusIE, RCTIPlusSeriesIE, RCTIPlusTVIE, ) from .rds import RDSIE from .redbee import ( RTBFIE, ParliamentLiveUKIE, ) from .redbulltv import ( RedBullEmbedIE, RedBullIE, RedBullTVIE, RedBullTVRrnContentIE, ) from .reddit import RedditIE from .redge import RedCDNLivxIE from .redgifs import ( RedGifsIE, RedGifsSearchIE, RedGifsUserIE, ) from .redtube import RedTubeIE from .rentv import ( RENTVIE, RENTVArticleIE, ) from .restudy import RestudyIE from .reuters import ReutersIE from .reverbnation import ReverbNationIE from .rheinmaintv import RheinMainTVIE from .ridehome import RideHomeIE from .rinsefm import ( RinseFMArtistPlaylistIE, RinseFMIE, ) from .rmcdecouverte import RMCDecouverteIE from .rockstargames import RockstarGamesIE from .rokfin import ( RokfinChannelIE, RokfinIE, RokfinSearchIE, RokfinStackIE, ) from .roosterteeth import ( RoosterTeethIE, RoosterTeethSeriesIE, ) from .rottentomatoes import RottenTomatoesIE from .rozhlas import ( MujRozhlasIE, RozhlasIE, RozhlasVltavaIE, ) from .rte import ( RteIE, RteRadioIE, ) from .rtl2 import RTL2IE from .rtlnl import ( RTLLuArticleIE, RTLLuLiveIE, RTLLuRadioIE, RTLLuTeleVODIE, RtlNlIE, ) from .rtnews import ( RTDocumentryIE, RTDocumentryPlaylistIE, RTNewsIE, RuptlyIE, ) from .rtp import RTPIE from .rtrfm import RTRFMIE from .rts import RTSIE from .rtvcplay import ( RTVCKalturaIE, RTVCPlayEmbedIE, RTVCPlayIE, ) from .rtve import ( RTVEALaCartaIE, RTVEAudioIE, RTVEInfantilIE, RTVELiveIE, RTVETelevisionIE, ) from .rtvs import RTVSIE from .rtvslo import ( RTVSLOIE, RTVSLOShowIE, ) from .rudovideo import RudoVideoIE from .rule34video import Rule34VideoIE from .rumble import ( RumbleChannelIE, RumbleEmbedIE, RumbleIE, ) from .rutube import ( RutubeChannelIE, RutubeEmbedIE, RutubeIE, RutubeMovieIE, RutubePersonIE, RutubePlaylistIE, RutubeTagsIE, ) from .rutv import RUTVIE from .ruutu import RuutuIE from .ruv import ( RuvIE, RuvSpilaIE, ) from .s4c import ( S4CIE, S4CSeriesIE, ) from .safari import ( SafariApiIE, SafariCourseIE, SafariIE, ) from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE from .sbs import SBSIE from .sbscokr import ( SBSCoKrAllvodProgramIE, SBSCoKrIE, SBSCoKrProgramsVodIE, ) from .screen9 import Screen9IE from .screencast import ScreencastIE from .screencastify import ScreencastifyIE from .screencastomatic import ScreencastOMaticIE from .screenrec import ScreenRecIE from .scrippsnetworks import ( ScrippsNetworksIE, ScrippsNetworksWatchIE, ) from .scrolller import ScrolllerIE from .scte import ( SCTEIE, SCTECourseIE, ) from .sejmpl import SejmIE from .sen import SenIE from .senalcolombia import SenalColombiaLiveIE from .senategov import ( SenateGovIE, SenateISVPIE, ) from .sendtonews import SendtoNewsIE from .servus import ServusIE from .sevenplus import SevenPlusIE from .sexu import SexuIE from .seznamzpravy import ( SeznamZpravyArticleIE, SeznamZpravyIE, ) from .shahid import ( ShahidIE, ShahidShowIE, ) from .sharepoint import SharePointIE from .sharevideos import ShareVideosEmbedIE from .shemaroome import ShemarooMeIE from .showroomlive import ShowRoomLiveIE from .sibnet import SibnetEmbedIE from .simplecast import ( SimplecastEpisodeIE, SimplecastIE, SimplecastPodcastIE, ) from .sina import SinaIE from .sixplay import SixPlayIE from .skeb import SkebIE from .sky import ( SkyNewsIE, SkyNewsStoryIE, SkySportsIE, SkySportsNewsIE, ) from .skyit import ( CieloTVItIE, SkyItArteIE, SkyItIE, SkyItPlayerIE, SkyItVideoIE, SkyItVideoLiveIE, TV8ItIE, ) from .skylinewebcams import SkylineWebcamsIE from .skynewsarabia import ( SkyNewsArabiaArticleIE, SkyNewsArabiaIE, ) from .skynewsau import SkyNewsAUIE from .slideshare import SlideshareIE from .slideslive import SlidesLiveIE from .slutload import SlutloadIE from .smotrim import SmotrimIE from .snapchat import SnapchatSpotlightIE from .snotr import SnotrIE from .sohu import ( SohuIE, SohuVIE, ) from .sonyliv import ( SonyLIVIE, SonyLIVSeriesIE, ) from .soundcloud import ( SoundcloudEmbedIE, SoundcloudIE, SoundcloudPlaylistIE, SoundcloudRelatedIE, SoundcloudSearchIE, SoundcloudSetIE, SoundcloudTrackStationIE, SoundcloudUserIE, SoundcloudUserPermalinkIE, ) from .soundgasm import ( SoundgasmIE, SoundgasmProfileIE, ) from .southpark import ( SouthParkDeIE, SouthParkDkIE, SouthParkEsIE, SouthParkIE, SouthParkLatIE, SouthParkNlIE, ) from .sovietscloset import ( SovietsClosetIE, SovietsClosetPlaylistIE, ) from .spankbang import ( SpankBangIE, SpankBangPlaylistIE, ) from .spiegel import SpiegelIE from .spike import ( BellatorIE, ParamountNetworkIE, ) from .sport5 import Sport5IE from .sportbox import SportBoxIE from .sportdeutschland import SportDeutschlandIE from .spotify import ( SpotifyIE, SpotifyShowIE, ) from .spreaker import ( SpreakerIE, SpreakerPageIE, SpreakerShowIE, SpreakerShowPageIE, ) from .springboardplatform import SpringboardPlatformIE from .sprout import SproutIE from .sproutvideo import ( SproutVideoIE, VidsIoIE, ) from .srgssr import ( SRGSSRIE, SRGSSRPlayIE, ) from .srmediathek import SRMediathekIE from .stacommu import ( StacommuLiveIE, StacommuVODIE, TheaterComplexTownPPVIE, TheaterComplexTownVODIE, ) from .stageplus import StagePlusVODConcertIE from .stanfordoc import StanfordOpenClassroomIE from .startrek import StarTrekIE from .startv import StarTVIE from .steam import ( SteamCommunityBroadcastIE, SteamIE, ) from .stitcher import ( StitcherIE, StitcherShowIE, ) from .storyfire import ( StoryFireIE, StoryFireSeriesIE, StoryFireUserIE, ) from .streamable import StreamableIE from .streamcz import StreamCZIE from .streetvoice import StreetVoiceIE from .stretchinternet import StretchInternetIE from .stripchat import StripchatIE from .stv import STVPlayerIE from .substack import SubstackIE from .sunporno import SunPornoIE from .sverigesradio import ( SverigesRadioEpisodeIE, SverigesRadioPublicationIE, ) from .svt import ( SVTIE, SVTPageIE, SVTPlayIE, SVTSeriesIE, ) from .swearnet import SwearnetEpisodeIE from .syfy import SyfyIE from .syvdk import SYVDKIE from .sztvhu import SztvHuIE from .tagesschau import TagesschauIE from .taptap import ( TapTapAppIE, TapTapAppIntlIE, TapTapMomentIE, TapTapPostIntlIE, ) from .tass import TassIE from .tbs import TBSIE from .tbsjp import ( TBSJPEpisodeIE, TBSJPPlaylistIE, TBSJPProgramIE, ) from .teachable import ( TeachableCourseIE, TeachableIE, ) from .teachertube import ( TeacherTubeIE, TeacherTubeUserIE, ) from .teachingchannel import TeachingChannelIE from .teamcoco import ( ConanClassicIE, TeamcocoIE, ) from .teamtreehouse import TeamTreeHouseIE from .ted import ( TedEmbedIE, TedPlaylistIE, TedSeriesIE, TedTalkIE, ) from .tele5 import Tele5IE from .tele13 import Tele13IE from .telebruxelles import TeleBruxellesIE from .telecaribe import TelecaribePlayIE from .telecinco import TelecincoIE from .telegraaf import TelegraafIE from .telegram import TelegramEmbedIE from .telemb import TeleMBIE from .telemundo import TelemundoIE from .telequebec import ( TeleQuebecEmissionIE, TeleQuebecIE, TeleQuebecLiveIE, TeleQuebecSquatIE, TeleQuebecVideoIE, ) from .teletask import TeleTaskIE from .telewebion import TelewebionIE from .tempo import ( IVXPlayerIE, TempoIE, ) from .tencent import ( IflixEpisodeIE, IflixSeriesIE, VQQSeriesIE, VQQVideoIE, WeTvEpisodeIE, WeTvSeriesIE, ) from .tennistv import TennisTVIE from .tenplay import ( TenPlayIE, TenPlaySeasonIE, ) from .testurl import TestURLIE from .tf1 import TF1IE from .tfo import TFOIE from .theguardian import ( TheGuardianPodcastIE, TheGuardianPodcastPlaylistIE, ) from .theholetv import TheHoleTvIE from .theintercept import TheInterceptIE from .theplatform import ( ThePlatformFeedIE, ThePlatformIE, ) from .thestar import TheStarIE from .thesun import TheSunIE from .theweatherchannel import TheWeatherChannelIE from .thisamericanlife import ThisAmericanLifeIE from .thisoldhouse import ThisOldHouseIE from .thisvid import ( ThisVidIE, ThisVidMemberIE, ThisVidPlaylistIE, ) from .threeqsdn import ThreeQSDNIE from .threespeak import ( ThreeSpeakIE, ThreeSpeakUserIE, ) from .tiktok import ( DouyinIE, TikTokCollectionIE, TikTokEffectIE, TikTokIE, TikTokLiveIE, TikTokSoundIE, TikTokTagIE, TikTokUserIE, TikTokVMIE, ) from .tmz import TMZIE from .tnaflix import ( EMPFlixIE, MovieFapIE, TNAFlixIE, TNAFlixNetworkEmbedIE, ) from .toggle import ( MeWatchIE, ToggleIE, ) from .toggo import ToggoIE from .tonline import TOnlineIE from .toongoggles import ToonGogglesIE from .toutv import TouTvIE from .toypics import ( ToypicsIE, ToypicsUserIE, ) from .traileraddict import TrailerAddictIE from .triller import ( TrillerIE, TrillerShortIE, TrillerUserIE, ) from .trovo import ( TrovoChannelClipIE, TrovoChannelVodIE, TrovoIE, TrovoVodIE, ) from .trtcocuk import TrtCocukVideoIE from .trtworld import TrtWorldIE from .trueid import TrueIDIE from .trunews import TruNewsIE from .truth import TruthIE from .trutv import TruTVIE from .tube8 import Tube8IE from .tubetugraz import ( TubeTuGrazIE, TubeTuGrazSeriesIE, ) from .tubitv import ( TubiTvIE, TubiTvShowIE, ) from .tumblr import TumblrIE from .tunein import ( TuneInPodcastEpisodeIE, TuneInPodcastIE, TuneInShortenerIE, TuneInStationIE, ) from .tv2 import ( TV2IE, KatsomoIE, MTVUutisetArticleIE, TV2ArticleIE, ) from .tv2dk import ( TV2DKIE, TV2DKBornholmPlayIE, ) from .tv2hu import ( TV2HuIE, TV2HuSeriesIE, ) from .tv4 import TV4IE from .tv5mondeplus import TV5MondePlusIE from .tv5unis import ( TV5UnisIE, TV5UnisVideoIE, ) from .tv24ua import TV24UAVideoIE from .tva import TVAIE from .tvanouvelles import ( TVANouvellesArticleIE, TVANouvellesIE, ) from .tvc import ( TVCIE, TVCArticleIE, ) from .tver import TVerIE from .tvigle import TvigleIE from .tviplayer import TVIPlayerIE from .tvland import TVLandIE from .tvn24 import TVN24IE from .tvnoe import TVNoeIE from .tvopengr import ( TVOpenGrEmbedIE, TVOpenGrWatchIE, ) from .tvp import ( TVPIE, TVPEmbedIE, TVPStreamIE, TVPVODSeriesIE, TVPVODVideoIE, ) from .tvplay import ( TVPlayHomeIE, TVPlayIE, ) from .tvplayer import TVPlayerIE from .tweakers import TweakersIE from .twentymin import TwentyMinutenIE from .twentythreevideo import TwentyThreeVideoIE from .twitcasting import ( TwitCastingIE, TwitCastingLiveIE, TwitCastingUserIE, ) from .twitch import ( TwitchClipsIE, TwitchCollectionIE, TwitchStreamIE, TwitchVideosClipsIE, TwitchVideosCollectionsIE, TwitchVideosIE, TwitchVodIE, ) from .twitter import ( TwitterAmplifyIE, TwitterBroadcastIE, TwitterCardIE, TwitterIE, TwitterShortenerIE, TwitterSpacesIE, ) from .txxx import ( PornTopIE, TxxxIE, ) from .udemy import ( UdemyCourseIE, UdemyIE, ) from .udn import UDNEmbedIE from .ufctv import ( UFCTVIE, UFCArabiaIE, ) from .ukcolumn import UkColumnIE from .uktvplay import UKTVPlayIE from .umg import UMGDeIE from .unistra import UnistraIE from .unity import UnityIE from .unsupported import ( KnownDRMIE, KnownPiracyIE, ) from .uol import UOLIE from .uplynk import ( UplynkIE, UplynkPreplayIE, ) from .urort import UrortIE from .urplay import URPlayIE from .usanetwork import USANetworkIE from .usatoday import USATodayIE from .ustream import ( UstreamChannelIE, UstreamIE, ) from .ustudio import ( UstudioEmbedIE, UstudioIE, ) from .utreon import UtreonIE from .varzesh3 import Varzesh3IE from .vbox7 import Vbox7IE from .veo import VeoIE from .veoh import ( VeohIE, VeohUserIE, ) from .vesti import VestiIE from .vevo import ( VevoIE, VevoPlaylistIE, ) from .vgtv import ( VGTVIE, BTArticleIE, BTVestlendingenIE, ) from .vh1 import VH1IE from .vice import ( ViceArticleIE, ViceIE, ViceShowIE, ) from .viddler import ViddlerIE from .videa import VideaIE from .videocampus_sachsen import ( VideocampusSachsenIE, ViMPPlaylistIE, ) from .videodetective import VideoDetectiveIE from .videofyme import VideofyMeIE from .videoken import ( VideoKenCategoryIE, VideoKenIE, VideoKenPlayerIE, VideoKenPlaylistIE, VideoKenTopicIE, ) from .videomore import ( VideomoreIE, VideomoreSeasonIE, VideomoreVideoIE, ) from .videopress import VideoPressIE from .vidflex import VidflexIE from .vidio import ( VidioIE, VidioLiveIE, VidioPremierIE, ) from .vidlii import VidLiiIE from .vidly import VidlyIE from .vidyard import VidyardIE from .viewlift import ( ViewLiftEmbedIE, ViewLiftIE, ) from .viidea import ViideaIE from .viki import ( VikiChannelIE, VikiIE, ) from .vimeo import ( VHXEmbedIE, VimeoAlbumIE, VimeoChannelIE, VimeoGroupsIE, VimeoIE, VimeoLikesIE, VimeoOndemandIE, VimeoProIE, VimeoReviewIE, VimeoUserIE, VimeoWatchLaterIE, ) from .vimm import ( VimmIE, VimmRecordingIE, ) from .vine import ( VineIE, VineUserIE, ) from .viously import ViouslyIE from .viqeo import ViqeoIE from .viu import ( ViuIE, ViuOTTIE, ViuOTTIndonesiaIE, ViuPlaylistIE, ) from .vk import ( VKIE, VKPlayIE, VKPlayLiveIE, VKUserVideosIE, VKWallPostIE, ) from .vocaroo import VocarooIE from .vodpl import VODPlIE from .vodplatform import VODPlatformIE from .voicy import ( VoicyChannelIE, VoicyIE, ) from .volejtv import VolejTVIE from .voxmedia import ( VoxMediaIE, VoxMediaVolumeIE, ) from .vrt import ( VRTIE, DagelijkseKostIE, KetnetIE, Radio1BeIE, VrtNUIE, ) from .vtm import VTMIE from .vtv import ( VTVIE, VTVGoIE, ) from .vuclip import VuClipIE from .vvvvid import ( VVVVIDIE, VVVVIDShowIE, ) from .walla import WallaIE from .washingtonpost import ( WashingtonPostArticleIE, WashingtonPostIE, ) from .wat import WatIE from .wdr import ( WDRIE, WDRElefantIE, WDRMobileIE, WDRPageIE, ) from .webcamerapl import WebcameraplIE from .webcaster import ( WebcasterFeedIE, WebcasterIE, ) from .webofstories import ( WebOfStoriesIE, WebOfStoriesPlaylistIE, ) from .weibo import ( WeiboIE, WeiboUserIE, WeiboVideoIE, ) from .weiqitv import WeiqiTVIE from .weverse import ( WeverseIE, WeverseLiveIE, WeverseLiveTabIE, WeverseMediaIE, WeverseMediaTabIE, WeverseMomentIE, ) from .wevidi import WeVidiIE from .weyyak import WeyyakIE from .whowatch import WhoWatchIE from .whyp import WhypIE from .wikimedia import WikimediaIE from .wimbledon import WimbledonIE from .wimtv import WimTVIE from .wistia import ( WistiaChannelIE, WistiaIE, WistiaPlaylistIE, ) from .wordpress import ( WordpressMiniAudioPlayerEmbedIE, WordpressPlaylistEmbedIE, ) from .worldstarhiphop import WorldStarHipHopIE from .wppilot import ( WPPilotChannelsIE, WPPilotIE, ) from .wrestleuniverse import ( WrestleUniversePPVIE, WrestleUniverseVODIE, ) from .wsj import ( WSJIE, WSJArticleIE, ) from .wwe import WWEIE from .wykop import ( WykopDigCommentIE, WykopDigIE, WykopPostCommentIE, WykopPostIE, ) from .xanimu import XanimuIE from .xboxclips import XboxClipsIE from .xhamster import ( XHamsterEmbedIE, XHamsterIE, XHamsterUserIE, ) from .xiaohongshu import XiaoHongShuIE from .ximalaya import ( XimalayaAlbumIE, XimalayaIE, ) from .xinpianchang import XinpianchangIE from .xminus import XMinusIE from .xnxx import XNXXIE from .xstream import XstreamIE from .xvideos import ( XVideosIE, XVideosQuickiesIE, ) from .xxxymovies import XXXYMoviesIE from .yahoo import ( YahooIE, YahooJapanNewsIE, YahooSearchIE, ) from .yandexdisk import YandexDiskIE from .yandexmusic import ( YandexMusicAlbumIE, YandexMusicArtistAlbumsIE, YandexMusicArtistTracksIE, YandexMusicPlaylistIE, YandexMusicTrackIE, ) from .yandexvideo import ( YandexVideoIE, YandexVideoPreviewIE, ZenYandexChannelIE, ZenYandexIE, ) from .yapfiles import YapFilesIE from .yappy import ( YappyIE, YappyProfileIE, ) from .yle_areena import YleAreenaIE from .youjizz import YouJizzIE from .youku import ( YoukuIE, YoukuShowIE, ) from .younow import ( YouNowChannelIE, YouNowLiveIE, YouNowMomentIE, ) from .youporn import ( YouPornCategoryIE, YouPornChannelIE, YouPornCollectionIE, YouPornIE, YouPornStarIE, YouPornTagIE, YouPornVideosIE, ) from .zaiko import ( ZaikoETicketIE, ZaikoIE, ) from .zapiks import ZapiksIE from .zattoo import ( BBVTVIE, EWETVIE, SAKTVIE, VTXTVIE, BBVTVLiveIE, BBVTVRecordingsIE, EinsUndEinsTVIE, EinsUndEinsTVLiveIE, EinsUndEinsTVRecordingsIE, EWETVLiveIE, EWETVRecordingsIE, GlattvisionTVIE, GlattvisionTVLiveIE, GlattvisionTVRecordingsIE, MNetTVIE, MNetTVLiveIE, MNetTVRecordingsIE, NetPlusTVIE, NetPlusTVLiveIE, NetPlusTVRecordingsIE, OsnatelTVIE, OsnatelTVLiveIE, OsnatelTVRecordingsIE, QuantumTVIE, QuantumTVLiveIE, QuantumTVRecordingsIE, SAKTVLiveIE, SAKTVRecordingsIE, SaltTVIE, SaltTVLiveIE, SaltTVRecordingsIE, VTXTVLiveIE, VTXTVRecordingsIE, WalyTVIE, WalyTVLiveIE, WalyTVRecordingsIE, ZattooIE, ZattooLiveIE, ZattooMoviesIE, ZattooRecordingsIE, ) from .zdf import ( ZDFIE, ZDFChannelIE, ) from .zee5 import ( Zee5IE, Zee5SeriesIE, ) from .zeenews import ZeeNewsIE from .zenporn import ZenPornIE from .zetland import ZetlandDKArticleIE from .zhihu import ZhihuIE from .zingmp3 import ( ZingMp3AlbumIE, ZingMp3ChartHomeIE, ZingMp3ChartMusicVideoIE, ZingMp3HubIE, ZingMp3IE, ZingMp3LiveRadioIE, ZingMp3PodcastEpisodeIE, ZingMp3PodcastIE, ZingMp3UserIE, ZingMp3WeekChartIE, ) from .zoom import ZoomIE from .zype import ZypeIE yt-dlp-2024.09.27/yt_dlp/extractor/abc.py000066400000000000000000000431731467563447100177700ustar00rootroot00000000000000import hashlib import hmac import re import time from .common import InfoExtractor from ..utils import ( ExtractorError, dict_get, int_or_none, js_to_json, parse_iso8601, str_or_none, traverse_obj, try_get, unescapeHTML, update_url_query, url_or_none, ) class ABCIE(InfoExtractor): IE_NAME = 'abc.net.au' _VALID_URL = r'https?://(?:www\.)?abc\.net\.au/(?:news|btn)/(?:[^/]+/){1,4}(?P\d{5,})' _TESTS = [{ 'url': 'http://www.abc.net.au/news/2014-11-05/australia-to-staff-ebola-treatment-centre-in-sierra-leone/5868334', 'md5': 'cb3dd03b18455a661071ee1e28344d9f', 'info_dict': { 'id': '5868334', 'ext': 'mp4', 'title': 'Australia to help staff Ebola treatment centre in Sierra Leone', 'description': 'md5:809ad29c67a05f54eb41f2a105693a67', }, 'skip': 'this video has expired', }, { 'url': 'http://www.abc.net.au/news/2015-08-17/warren-entsch-introduces-same-sex-marriage-bill/6702326', 'md5': '4ebd61bdc82d9a8b722f64f1f4b4d121', 'info_dict': { 'id': 'NvqvPeNZsHU', 'ext': 'mp4', 'upload_date': '20150816', 'uploader': 'ABC News (Australia)', 'description': 'Government backbencher Warren Entsch introduces a cross-party sponsored bill to legalise same-sex marriage, saying the bill is designed to promote "an inclusive Australia, not a divided one.". Read more here: http://ab.co/1Mwc6ef', 'uploader_id': 'NewsOnABC', 'title': 'Marriage Equality: Warren Entsch introduces same sex marriage bill', }, 'add_ie': ['Youtube'], 'skip': 'Not accessible from Travis CI server', }, { 'url': 'http://www.abc.net.au/news/2015-10-23/nab-lifts-interest-rates-following-westpac-and-cba/6880080', 'md5': 'b96eee7c9edf4fc5a358a0252881cc1f', 'info_dict': { 'id': '6880080', 'ext': 'mp3', 'title': 'NAB lifts interest rates, following Westpac and CBA', 'description': 'md5:f13d8edc81e462fce4a0437c7dc04728', }, }, { 'url': 'http://www.abc.net.au/news/2015-10-19/6866214', 'only_matching': True, }, { 'url': 'https://www.abc.net.au/btn/classroom/wwi-centenary/10527914', 'info_dict': { 'id': '10527914', 'ext': 'mp4', 'title': 'WWI Centenary', 'description': 'md5:c2379ec0ca84072e86b446e536954546', }, }, { 'url': 'https://www.abc.net.au/news/programs/the-world/2020-06-10/black-lives-matter-protests-spawn-support-for/12342074', 'info_dict': { 'id': '12342074', 'ext': 'mp4', 'title': 'Black Lives Matter protests spawn support for Papuans in Indonesia', 'description': 'md5:2961a17dc53abc558589ccd0fb8edd6f', }, }, { 'url': 'https://www.abc.net.au/btn/newsbreak/btn-newsbreak-20200814/12560476', 'info_dict': { 'id': 'tDL8Ld4dK_8', 'ext': 'mp4', 'title': 'Fortnite Banned From Apple and Google App Stores', 'description': 'md5:a6df3f36ce8f816b74af4bd6462f5651', 'upload_date': '20200813', 'uploader': 'Behind the News', 'uploader_id': 'behindthenews', }, }, { 'url': 'https://www.abc.net.au/news/2023-06-25/wagner-boss-orders-troops-back-to-bases-to-avoid-bloodshed/102520540', 'info_dict': { 'id': '102520540', 'title': 'Wagner Group retreating from Russia, leader Prigozhin to move to Belarus', 'ext': 'mp4', 'description': 'Wagner troops leave Rostov-on-Don and\xa0Yevgeny Prigozhin will move to Belarus under a deal brokered by Belarusian President Alexander Lukashenko to end the mutiny.', 'thumbnail': 'https://live-production.wcms.abc-cdn.net.au/0c170f5b57f0105c432f366c0e8e267b?impolicy=wcms_crop_resize&cropH=2813&cropW=5000&xPos=0&yPos=249&width=862&height=485', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) mobj = re.search(r'[^"]+)"\s+data-duration="\d+"\s+title="Download audio directly">', webpage) if mobj: urls_info = mobj.groupdict() youtube = False video = False else: mobj = re.search(r'External Link:', webpage) if mobj is None: mobj = re.search(r'' xml_root = self._html_search_regex( PLAYER_REGEX, start_page, 'xml root', default=None) if xml_root is None: # Probably need to authenticate login_res = self._login(webpage_url, display_id) if login_res is None: self.report_warning('Could not login.') else: start_page = login_res # Grab the url from the authenticated page xml_root = self._html_search_regex( PLAYER_REGEX, start_page, 'xml root') xml_name = self._html_search_regex( r'', webpage): url = self._search_regex( r'src=(["\'])(?P.+?partnerplayer.+?)\1', iframe, 'player URL', default=None, group='url') if url: break if not url: url = self._og_search_url(webpage) mobj = re.match( self._VALID_URL, self._proto_relative_url(url.strip())) player_id = mobj.group('player_id') if not display_id: display_id = player_id if player_id: player_page = self._download_webpage( url, display_id, note='Downloading player page', errnote='Could not download player page') video_id = self._search_regex( r'\d+)' _TESTS = [ { 'url': 'https://pbskids.org/video/molly-of-denali/3030407927', 'md5': '1ded20a017cc6b53446238f1804ce4c7', 'info_dict': { 'id': '3030407927', 'title': 'Bird in the Hand/Bye-Bye Birdie', 'channel': 'molly-of-denali', 'duration': 1540, 'ext': 'mp4', 'series': 'Molly of Denali', 'description': 'md5:d006b2211633685d8ebc8d03b6d5611e', 'categories': ['Episode'], 'upload_date': '20190718', }, }, { 'url': 'https://pbskids.org/video/plum-landing/2365205059', 'md5': '92e5d189851a64ae1d0237a965be71f5', 'info_dict': { 'id': '2365205059', 'title': 'Cooper\'s Favorite Place in Nature', 'channel': 'plum-landing', 'duration': 67, 'ext': 'mp4', 'series': 'Plum Landing', 'description': 'md5:657e5fc4356a84ead1c061eb280ff05d', 'categories': ['Episode'], 'upload_date': '20140302', }, }, ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) meta = self._search_json(r'window\._PBS_KIDS_DEEPLINK\s*=', webpage, 'video info', video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles( traverse_obj(meta, ('video_obj', 'URI', {url_or_none})), video_id, ext='mp4') return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, **traverse_obj(meta, { 'categories': ('video_obj', 'video_type', {str}, {lambda x: [x] if x else None}), 'channel': ('show_slug', {str}), 'description': ('video_obj', 'description', {str}), 'duration': ('video_obj', 'duration', {int_or_none}), 'series': ('video_obj', 'program_title', {str}), 'title': ('video_obj', 'title', {str}), 'upload_date': ('video_obj', 'air_date', {unified_strdate}), }), } yt-dlp-2024.09.27/yt_dlp/extractor/pearvideo.py000066400000000000000000000046761467563447100212260ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( qualities, traverse_obj, unified_timestamp, ) class PearVideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pearvideo\.com/video_(?P\d+)' _TEST = { 'url': 'http://www.pearvideo.com/video_1076290', 'info_dict': { 'id': '1076290', 'ext': 'mp4', 'title': '小浣熊在主人家玻璃上滚石头:没砸', 'description': 'md5:01d576b747de71be0ee85eb7cac25f9d', 'timestamp': 1494275280, 'upload_date': '20170508', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) quality = qualities( ('ldflv', 'ld', 'sdflv', 'sd', 'hdflv', 'hd', 'src')) formats = [{ 'url': mobj.group('url'), 'format_id': mobj.group('id'), 'quality': quality(mobj.group('id')), } for mobj in re.finditer( r'(?P[a-zA-Z]+)Url\s*=\s*(["\'])(?P(?:https?:)?//.+?)\2', webpage)] if not formats: info = self._download_json( 'https://www.pearvideo.com/videoStatus.jsp', video_id=video_id, query={'contId': video_id}, headers={'Referer': url}) formats = [{ 'format_id': k, 'url': v.replace(info['systemTime'], f'cont-{video_id}') if k == 'srcUrl' else v, } for k, v in traverse_obj(info, ('videoInfo', 'videos'), default={}).items() if v] title = self._search_regex( (r']+\bclass=(["\'])video-tt\1[^>]*>(?P[^<]+)', r'<[^>]+\bdata-title=(["\'])(?P(?:(?!\1).)+)\1'), webpage, 'title', group='value') description = self._search_regex( (r']+\bclass=(["\'])summary\1[^>]*>(?P[^<]+)', r'<[^>]+\bdata-summary=(["\'])(?P(?:(?!\1).)+)\1'), webpage, 'description', default=None, group='value') or self._html_search_meta('Description', webpage) timestamp = unified_timestamp(self._search_regex( r']+\bclass=["\']date["\'][^>]*>([^<]+)', webpage, 'timestamp', fatal=False)) return { 'id': video_id, 'title': title, 'description': description, 'timestamp': timestamp, 'formats': formats, } yt-dlp-2024.09.27/yt_dlp/extractor/peekvids.py000066400000000000000000000160561467563447100210550ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( ExtractorError, get_element_by_class, int_or_none, merge_dicts, url_or_none, ) class PeekVidsBaseIE(InfoExtractor): def _real_extract(self, url): domain, video_id = self._match_valid_url(url).group('domain', 'id') webpage = self._download_webpage(url, video_id, expected_status=429) if '>Rate Limit Exceeded' in webpage: raise ExtractorError( f'You are suspected as a bot. Wait, or pass the captcha on the site and provide cookies. {self._login_hint()}', video_id=video_id, expected=True) title = self._html_search_regex(r'(?s)]*>(.+?)', webpage, 'title') display_id = video_id video_id = self._search_regex(r'(?s)]+\bdata-id\s*=\s*["\']?([\w-]+)', webpage, 'short video ID') srcs = self._download_json( f'https://www.{domain}/v-alt/{video_id}', video_id, note='Downloading list of source files') formats = [] for k, v in srcs.items(): f_url = url_or_none(v) if not f_url: continue height = self._search_regex(r'^data-src(\d{3,})$', k, 'height', default=None) if not height: continue formats.append({ 'url': f_url, 'format_id': height, 'height': int_or_none(height), }) if not formats: formats = [{'url': url} for url in srcs.values()] info = self._search_json_ld(webpage, video_id, expected_type='VideoObject', default={}) info.pop('url', None) # may not have found the thumbnail if it was in a list in the ld+json info.setdefault('thumbnail', self._og_search_thumbnail(webpage)) detail = (get_element_by_class('detail-video-block', webpage) or get_element_by_class('detail-block', webpage) or '') info['description'] = self._html_search_regex( rf'(?s)(.+?)(?:{re.escape(info.get("description", ""))}\s*<|]*>\s*{re.escape(name)}\s*:\s*(.+?)', html, name, default='') return list(filter(None, re.split(r'\s+', l))) return merge_dicts({ 'id': video_id, 'display_id': display_id, 'age_limit': 18, 'formats': formats, 'categories': cat_tags('Categories', detail), 'tags': cat_tags('Tags', detail), 'uploader': self._html_search_regex(r'[Uu]ploaded\s+by\s(.+?)"', webpage, 'uploader', default=None), }, info) class PeekVidsIE(PeekVidsBaseIE): _VALID_URL = r'''(?x) https?://(?:www\.)?(?Ppeekvids\.com)/ (?:(?:[^/?#]+/){2}|embed/?\?(?:[^#]*&)?v=) (?P[^/?&#]*) ''' _TESTS = [{ 'url': 'https://peekvids.com/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp/BSyLMbN0YCd', 'md5': '2ff6a357a9717dc9dc9894b51307e9a2', 'info_dict': { 'id': '1262717', 'display_id': 'BSyLMbN0YCd', 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', 'timestamp': 1642579329, 'upload_date': '20220119', 'duration': 416, 'view_count': int, 'age_limit': 18, 'uploader': 'SEXYhub.com', 'categories': list, 'tags': list, }, }] class PlayVidsIE(PeekVidsBaseIE): _VALID_URL = r'https?://(?:www\.)?(?Pplayvids\.com)/(?:embed/|\w\w?/)?(?P[^/?#]*)' _TESTS = [{ 'url': 'https://www.playvids.com/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', 'md5': '2f12e50213dd65f142175da633c4564c', 'info_dict': { 'id': '1978030', 'display_id': 'U3pBrYhsjXM', 'title': ' Dane Jones - Cute redhead with perfect tits with Mini Vamp', 'ext': 'mp4', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:0a61df3620de26c0af8963b1a730cd69', 'timestamp': 1640435839, 'upload_date': '20211225', 'duration': 416, 'view_count': int, 'age_limit': 18, 'uploader': 'SEXYhub.com', 'categories': list, 'tags': list, }, }, { 'url': 'https://www.playvids.com/es/U3pBrYhsjXM/pc/dane-jones-cute-redhead-with-perfect-tits-with-mini-vamp', 'only_matching': True, }, { 'url': 'https://www.playvids.com/embed/U3pBrYhsjXM', 'only_matching': True, }, { 'url': 'https://www.playvids.com/bKmGLe3IwjZ/sv/brazzers-800-phone-sex-madison-ivy-always-on-the-line', 'md5': 'e783986e596cafbf46411a174ab42ba6', 'info_dict': { 'id': '762385', 'display_id': 'bKmGLe3IwjZ', 'ext': 'mp4', 'title': 'Brazzers - 1 800 Phone Sex: Madison Ivy Always On The Line 6', 'description': 'md5:bdcd2db2b8ad85831a491d7c8605dcef', 'timestamp': 1516958544, 'upload_date': '20180126', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 480, 'uploader': 'Brazzers', 'age_limit': 18, 'view_count': int, 'categories': list, 'tags': list, }, }, { 'url': 'https://www.playvids.com/v/47iUho33toY', 'md5': 'b056b5049d34b648c1e86497cf4febce', 'info_dict': { 'id': '700621', 'display_id': '47iUho33toY', 'ext': 'mp4', 'title': 'KATEE OWEN STRIPTIASE IN SEXY RED LINGERIE', 'timestamp': 1507052209, 'upload_date': '20171003', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 332, 'uploader': 'Cacerenele', 'age_limit': 18, 'view_count': int, 'categories': list, 'tags': list, }, }, { 'url': 'https://www.playvids.com/z3_7iwWCmqt/sexy-teen-filipina-striptease-beautiful-pinay-bargirl-strips-and-dances', 'md5': 'efa09be9f031314b7b7e3bc6510cd0df', 'info_dict': { 'id': '1523518', 'display_id': 'z3_7iwWCmqt', 'ext': 'mp4', 'title': 'SEXY TEEN FILIPINA STRIPTEASE - Beautiful Pinay Bargirl Strips and Dances', 'timestamp': 1607470323, 'upload_date': '20201208', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 593, 'uploader': 'yorours', 'age_limit': 18, 'view_count': int, 'categories': list, 'tags': list, }, }] yt-dlp-2024.09.27/yt_dlp/extractor/peertube.py000066400000000000000000002320121467563447100210460ustar00rootroot00000000000000import functools import re from .common import InfoExtractor from ..utils import ( OnDemandPagedList, format_field, int_or_none, parse_resolution, str_or_none, try_get, unified_timestamp, url_or_none, urljoin, ) class PeerTubeIE(InfoExtractor): _INSTANCES_RE = r'''(?: # Taken from https://instances.joinpeertube.org/instances 0ch\.tv| 3dctube\.3dcandy\.social| all\.electric\.kitchen| alterscope\.fr| anarchy\.tube| apathy\.tv| apertatube\.net| archive\.nocopyrightintended\.tv| archive\.reclaim\.tv| area51\.media| astrotube-ufe\.obspm\.fr| astrotube\.obspm\.fr| audio\.freediverse\.com| azxtube\.youssefc\.tn| bark\.video| battlepenguin\.video| bava\.tv| bee-tube\.fr| beetoons\.tv| biblion\.refchat\.net| biblioteca\.theowlclub\.net| bideoak\.argia\.eus| bideoteka\.eus| birdtu\.be| bitcointv\.com| bonn\.video| breeze\.tube| brioco\.live| brocosoup\.fr| canal\.facil\.services| canard\.tube| cdn01\.tilvids\.com| celluloid-media\.huma-num\.fr| chicago1\.peertube\.support| cliptube\.org| cloudtube\.ise\.fraunhofer\.de| comf\.tube| comics\.peertube\.biz| commons\.tube| communitymedia\.video| conspiracydistillery\.com| crank\.recoil\.org| dalek\.zone| dalliance\.network| dangly\.parts| darkvapor\.nohost\.me| daschauher\.aksel\.rocks| digitalcourage\.video| displayeurope\.video| ds106\.tv| dud-video\.inf\.tu-dresden\.de| dud175\.inf\.tu-dresden\.de| dytube\.com| ebildungslabor\.video| evangelisch\.video| fair\.tube| fedi\.video| fedimovie\.com| fediverse\.tv| film\.k-prod\.fr| flipboard\.video| foss\.video| fossfarmers\.company| fotogramas\.politicaconciencia\.org| freediverse\.com| freesoto-u2151\.vm\.elestio\.app| freesoto\.tv| garr\.tv| greatview\.video| grypstube\.uni-greifswald\.de| habratube\.site| ilbjach\.ru| infothema\.net| itvplus\.iiens\.net| johnydeep\.net| juggling\.digital| jupiter\.tube| kadras\.live| kino\.kompot\.si| kino\.schuerz\.at| kinowolnosc\.pl| kirche\.peertube-host\.de| kiwi\.froggirl\.club| kodcast\.com| kolektiva\.media| kpop\.22x22\.ru| kumi\.tube| la2\.peertube\.support| la3\.peertube\.support| la4\.peertube\.support| lastbreach\.tv| lawsplaining\.peertube\.biz| leopard\.tube| live\.codinglab\.ch| live\.libratoi\.org| live\.oldskool\.fi| live\.solari\.com| lucarne\.balsamine\.be| luxtube\.lu| makertube\.net| media\.econoalchemist\.com| media\.exo\.cat| media\.fsfe\.org| media\.gzevd\.de| media\.interior\.edu\.uy| media\.krashboyz\.org| media\.mzhd\.de| media\.smz-ma\.de| media\.theplattform\.net| media\.undeadnetwork\.de| medias\.debrouillonet\.org| medias\.pingbase\.net| mediatube\.fermalo\.fr| melsungen\.peertube-host\.de| merci-la-police\.fr| mindlyvideos\.com| mirror\.peertube\.metalbanana\.net| mirrored\.rocks| mix\.video| mountaintown\.video| movies\.metricsmaster\.eu| mtube\.mooo\.com| mytube\.kn-cloud\.de| mytube\.le5emeaxe\.fr| mytube\.madzel\.de| nadajemy\.com| nanawel-peertube\.dyndns\.org| neat\.tube| nethack\.tv| nicecrew\.tv| nightshift\.minnix\.dev| nolog\.media| nyltube\.nylarea\.com| ocfedtest\.hosted\.spacebear\.ee| openmedia\.edunova\.it| p2ptv\.ru| p\.eertu\.be| p\.lu| pastafriday\.club| patriottube\.sonsofliberty\.red| pcbu\.nl| peer\.azurs\.fr| peer\.d0g4\.me| peer\.lukeog\.com| peer\.madiator\.cloud| peer\.raise-uav\.com| peershare\.togart\.de| peertube-blablalinux\.be| peertube-demo\.learning-hub\.fr| peertube-docker\.cpy\.re| peertube-eu\.howlround\.com| peertube-u5014\.vm\.elestio\.app| peertube-us\.howlround\.com| peertube\.020\.pl| peertube\.0x5e\.eu| peertube\.1984\.cz| peertube\.2i2l\.net| peertube\.adjutor\.xyz| peertube\.adresse\.data\.gouv\.fr| peertube\.alpharius\.io| peertube\.am-networks\.fr| peertube\.anduin\.net| peertube\.anti-logic\.com| peertube\.arch-linux\.cz| peertube\.art3mis\.de| peertube\.artsrn\.ualberta\.ca| peertube\.askan\.info| peertube\.astral0pitek\.synology\.me| peertube\.atsuchan\.page| peertube\.automat\.click| peertube\.b38\.rural-it\.org| peertube\.be| peertube\.beeldengeluid\.nl| peertube\.bgzashtita\.es| peertube\.bike| peertube\.bildung-ekhn\.de| peertube\.biz| peertube\.br0\.fr| peertube\.bridaahost\.ynh\.fr| peertube\.bubbletea\.dev| peertube\.bubuit\.net| peertube\.cabaal\.net| peertube\.chatinbit\.com| peertube\.chaunchy\.com| peertube\.chir\.rs| peertube\.christianpacaud\.com| peertube\.chtisurel\.net| peertube\.chuggybumba\.com| peertube\.cipherbliss\.com| peertube\.cirkau\.art| peertube\.cloud\.nerdraum\.de| peertube\.cloud\.sans\.pub| peertube\.coko\.foundation| peertube\.communecter\.org| peertube\.concordia\.social| peertube\.corrigan\.xyz| peertube\.cpge-brizeux\.fr| peertube\.ctseuro\.com| peertube\.cuatrolibertades\.org| peertube\.cube4fun\.net| peertube\.dair-institute\.org| peertube\.davigge\.com| peertube\.dc\.pini\.fr| peertube\.deadtom\.me| peertube\.debian\.social| peertube\.delta0189\.xyz| peertube\.demonix\.fr| peertube\.designersethiques\.org| peertube\.desmu\.fr| peertube\.devol\.it| peertube\.dk| peertube\.doesstuff\.social| peertube\.eb8\.org| peertube\.education-forum\.com| peertube\.elforcer\.ru| peertube\.em\.id\.lv| peertube\.ethibox\.fr| peertube\.eu\.org| peertube\.european-pirates\.eu| peertube\.eus| peertube\.euskarabildua\.eus| peertube\.expi\.studio| peertube\.familie-berner\.de| peertube\.familleboisteau\.fr| peertube\.fedihost\.website| peertube\.fenarinarsa\.com| peertube\.festnoz\.de| peertube\.forteza\.fr| peertube\.freestorm\.online| peertube\.functional\.cafe| peertube\.gaminglinux\.fr| peertube\.gargantia\.fr| peertube\.geekgalaxy\.fr| peertube\.gemlog\.ca| peertube\.genma\.fr| peertube\.get-racing\.de| peertube\.ghis94\.ovh| peertube\.gidikroon\.eu| peertube\.giftedmc\.com| peertube\.grosist\.fr| peertube\.gruntwerk\.org| peertube\.gsugambit\.com| peertube\.hackerfoo\.com| peertube\.hellsite\.net| peertube\.helvetet\.eu| peertube\.histoirescrepues\.fr| peertube\.home\.x0r\.fr| peertube\.hyperfreedom\.org| peertube\.ichigo\.everydayimshuflin\.com| peertube\.ifwo\.eu| peertube\.in\.ua| peertube\.inapurna\.org| peertube\.informaction\.info| peertube\.interhop\.org| peertube\.it| peertube\.it-arts\.net| peertube\.jensdiemer\.de| peertube\.johntheserg\.al| peertube\.kaleidos\.net| peertube\.kalua\.im| peertube\.kcore\.org| peertube\.keazilla\.net| peertube\.klaewyss\.fr| peertube\.kleph\.eu| peertube\.kodein\.be| peertube\.kooperatywa\.tech| peertube\.kriom\.net| peertube\.kx\.studio| peertube\.kyriog\.eu| peertube\.la-famille-muller\.fr| peertube\.labeuropereunion\.eu| peertube\.lagvoid\.com| peertube\.lhc\.net\.br| peertube\.libresolutions\.network| peertube\.libretic\.fr| peertube\.librosphere\.fr| peertube\.logilab\.fr| peertube\.lon\.tv| peertube\.louisematic\.site| peertube\.luckow\.org| peertube\.luga\.at| peertube\.lyceeconnecte\.fr| peertube\.madixam\.xyz| peertube\.magicstone\.dev| peertube\.marienschule\.de| peertube\.marud\.fr| peertube\.maxweiss\.io| peertube\.miguelcr\.me| peertube\.mikemestnik\.net| peertube\.mobilsicher\.de| peertube\.monlycee\.net| peertube\.mxinfo\.fr| peertube\.naln1\.ca| peertube\.netzbegruenung\.de| peertube\.nicolastissot\.fr| peertube\.nogafam\.fr| peertube\.normalgamingcommunity\.cz| peertube\.nz| peertube\.offerman\.com| peertube\.ohioskates\.com| peertube\.onionstorm\.net| peertube\.opencloud\.lu| peertube\.otakufarms\.com| peertube\.paladyn\.org| peertube\.pix-n-chill\.fr| peertube\.r2\.enst\.fr| peertube\.r5c3\.fr| peertube\.redpill-insight\.com| peertube\.researchinstitute\.at| peertube\.revelin\.fr| peertube\.rlp\.schule| peertube\.rokugan\.fr| peertube\.rougevertbleu\.tv| peertube\.roundpond\.net| peertube\.rural-it\.org| peertube\.satoshishop\.de| peertube\.scyldings\.com| peertube\.securitymadein\.lu| peertube\.semperpax\.com| peertube\.semweb\.pro| peertube\.sensin\.eu| peertube\.sidh\.bzh| peertube\.skorpil\.cz| peertube\.smertrios\.com| peertube\.sqweeb\.net| peertube\.stattzeitung\.org| peertube\.stream| peertube\.su| peertube\.swrs\.net| peertube\.takeko\.cyou| peertube\.taxinachtegel\.de| peertube\.teftera\.com| peertube\.teutronic-services\.de| peertube\.ti-fr\.com| peertube\.tiennot\.net| peertube\.tmp\.rcp\.tf| peertube\.tspu\.edu\.ru| peertube\.tv| peertube\.tweb\.tv| peertube\.underworld\.fr| peertube\.vapronva\.pw| peertube\.veen\.world| peertube\.vesdia\.eu| peertube\.virtual-assembly\.org| peertube\.viviers-fibre\.net| peertube\.vlaki\.cz| peertube\.wiesbaden\.social| peertube\.wivodaim\.net| peertube\.wtf| peertube\.wtfayla\.net| peertube\.xrcb\.cat| peertube\.xwiki\.com| peertube\.zd\.do| peertube\.zetamc\.net| peertube\.zmuuf\.org| peertube\.zoz-serv\.org| peertube\.zwindler\.fr| peervideo\.ru| periscope\.numenaute\.org| pete\.warpnine\.de| petitlutinartube\.fr| phijkchu\.com| phoenixproject\.group| piraten\.space| pirtube\.calut\.fr| pityu\.flaki\.hu| play\.mittdata\.se| player\.ojamajo\.moe| podlibre\.video| portal\.digilab\.nfa\.cz| private\.fedimovie\.com| pt01\.lehrerfortbildung-bw\.de| pt\.diaspodon\.fr| pt\.freedomwolf\.cc| pt\.gordons\.gen\.nz| pt\.ilyamikcoder\.com| pt\.irnok\.net| pt\.mezzo\.moe| pt\.na4\.eu| pt\.netcraft\.ch| pt\.rwx\.ch| pt\.sfunk1x\.com| pt\.thishorsie\.rocks| pt\.vern\.cc| ptb\.lunarviews\.net| ptube\.de| ptube\.ranranhome\.info| puffy\.tube| puppet\.zone| qtube\.qlyoung\.net| quantube\.win| rankett\.net| replay\.jres\.org| review\.peertube\.biz| sdmtube\.fr| secure\.direct-live\.net| secure\.scanovid\.com| seka\.pona\.la| serv3\.wiki-tube\.de| skeptube\.fr| social\.fedimovie\.com| socpeertube\.ru| sovran\.video| special\.videovortex\.tv| spectra\.video| stl1988\.peertube-host\.de| stream\.biovisata\.lt| stream\.conesphere\.cloud| stream\.elven\.pw| stream\.jurnalfm\.md| stream\.k-prod\.fr| stream\.litera\.tools| stream\.nuemedia\.se| stream\.rlp-media\.de| stream\.vrse\.be| studios\.racer159\.com| styxhexenhammer666\.com| syrteplay\.obspm\.fr| t\.0x0\.st| tbh\.co-shaoghal\.net| test-fab\.ynh\.fr| testube\.distrilab\.fr| tgi\.hosted\.spacebear\.ee| theater\.ethernia\.net| thecool\.tube| thevideoverse\.com| tilvids\.com| tinkerbetter\.tube| tinsley\.video| trailers\.ddigest\.com| tube-action-educative\.apps\.education\.fr| tube-arts-lettres-sciences-humaines\.apps\.education\.fr| tube-cycle-2\.apps\.education\.fr| tube-cycle-3\.apps\.education\.fr| tube-education-physique-et-sportive\.apps\.education\.fr| tube-enseignement-professionnel\.apps\.education\.fr| tube-institutionnel\.apps\.education\.fr| tube-langues-vivantes\.apps\.education\.fr| tube-maternelle\.apps\.education\.fr| tube-numerique-educatif\.apps\.education\.fr| tube-sciences-technologies\.apps\.education\.fr| tube-test\.apps\.education\.fr| tube1\.perron-service\.de| tube\.9minuti\.it| tube\.abolivier\.bzh| tube\.alado\.space| tube\.amic37\.fr| tube\.area404\.cloud| tube\.arthack\.nz| tube\.asulia\.fr| tube\.awkward\.company| tube\.azbyka\.ru| tube\.azkware\.net| tube\.bartrip\.me\.uk| tube\.belowtoxic\.media| tube\.bingle\.plus| tube\.bit-friends\.de| tube\.bstly\.de| tube\.chosto\.me| tube\.cms\.garden| tube\.communia\.org| tube\.cyberia\.club| tube\.cybershock\.life| tube\.dembased\.xyz| tube\.dev\.displ\.eu| tube\.digitalesozialearbeit\.de| tube\.distrilab\.fr| tube\.doortofreedom\.org| tube\.dsocialize\.net| tube\.e-jeremy\.com| tube\.ebin\.club| tube\.elemac\.fr| tube\.erzbistum-hamburg\.de| tube\.exozy\.me| tube\.fdn\.fr| tube\.fedi\.quebec| tube\.fediverse\.at| tube\.felinn\.org| tube\.flokinet\.is| tube\.foad\.me\.uk| tube\.freepeople\.fr| tube\.friloux\.me| tube\.froth\.zone| tube\.fulda\.social| tube\.futuretic\.fr| tube\.g1zm0\.de| tube\.g4rf\.net| tube\.gaiac\.io| tube\.geekyboo\.net| tube\.genb\.de| tube\.ghk-academy\.info| tube\.gi-it\.de| tube\.grap\.coop| tube\.graz\.social| tube\.grin\.hu| tube\.hokai\.lol| tube\.int5\.net| tube\.interhacker\.space| tube\.invisible\.ch| tube\.io18\.top| tube\.itsg\.host| tube\.jeena\.net| tube\.kh-berlin\.de| tube\.kockatoo\.org| tube\.kotur\.org| tube\.koweb\.fr| tube\.la-dina\.net| tube\.lab\.nrw| tube\.lacaveatonton\.ovh| tube\.laurent-malys\.fr| tube\.leetdreams\.ch| tube\.linkse\.media| tube\.lokad\.com| tube\.lucie-philou\.com| tube\.media-techport\.de| tube\.morozoff\.pro| tube\.neshweb\.net| tube\.nestor\.coop| tube\.network\.europa\.eu| tube\.nicfab\.eu| tube\.nieuwwestbrabant\.nl| tube\.nogafa\.org| tube\.novg\.net| tube\.nox-rhea\.org| tube\.nuagelibre\.fr| tube\.numerique\.gouv\.fr| tube\.nuxnik\.com| tube\.nx12\.net| tube\.octaplex\.net| tube\.oisux\.org| tube\.okcinfo\.news| tube\.onlinekirche\.net| tube\.opportunis\.me| tube\.oraclefilms\.com| tube\.org\.il| tube\.pacapime\.ovh| tube\.parinux\.org| tube\.pastwind\.top| tube\.picasoft\.net| tube\.pilgerweg-21\.de| tube\.pmj\.rocks| tube\.pol\.social| tube\.ponsonaille\.fr| tube\.portes-imaginaire\.org| tube\.public\.apolut\.net| tube\.pustule\.org| tube\.pyngu\.com| tube\.querdenken-711\.de| tube\.rebellion\.global| tube\.reseau-canope\.fr| tube\.rhythms-of-resistance\.org| tube\.risedsky\.ovh| tube\.rooty\.fr| tube\.rsi\.cnr\.it| tube\.ryne\.moe| tube\.schleuss\.online| tube\.schule\.social| tube\.sekretaerbaer\.net| tube\.shanti\.cafe| tube\.shela\.nu| tube\.skrep\.in| tube\.sleeping\.town| tube\.sp-codes\.de| tube\.spdns\.org| tube\.systerserver\.net| tube\.systest\.eu| tube\.tappret\.fr| tube\.techeasy\.org| tube\.thierrytalbert\.fr| tube\.tinfoil-hat\.net| tube\.toldi\.eu| tube\.tpshd\.de| tube\.trax\.im| tube\.troopers\.agency| tube\.ttk\.is| tube\.tuxfriend\.fr| tube\.tylerdavis\.xyz| tube\.ullihome\.de| tube\.ulne\.be| tube\.undernet\.uy| tube\.vrpnet\.org| tube\.wolfe\.casa| tube\.xd0\.de| tube\.xn--baw-joa\.social| tube\.xy-space\.de| tube\.yapbreak\.fr| tubedu\.org| tubulus\.openlatin\.org| turtleisland\.video| tututu\.tube| tv\.adast\.dk| tv\.adn\.life| tv\.arns\.lt| tv\.atmx\.ca| tv\.based\.quest| tv\.farewellutopia\.com| tv\.filmfreedom\.net| tv\.gravitons\.org| tv\.io\.seg\.br| tv\.lumbung\.space| tv\.pirateradio\.social| tv\.pirati\.cz| tv\.santic-zombie\.ru| tv\.undersco\.re| tv\.zonepl\.net| tvox\.ru| twctube\.twc-zone\.eu| twobeek\.com| urbanists\.video| v\.9tail\.net| v\.basspistol\.org| v\.j4\.lc| v\.kisombrella\.top| v\.koa\.im| v\.kyaru\.xyz| v\.lor\.sh| v\.mkp\.ca| v\.posm\.gay| v\.slaycer\.top| veedeo\.org| vhs\.absturztau\.be| vid\.cthos\.dev| vid\.kinuseka\.us| vid\.mkp\.ca| vid\.nocogabriel\.fr| vid\.norbipeti\.eu| vid\.northbound\.online| vid\.ohboii\.de| vid\.plantplotting\.co\.uk| vid\.pretok\.tv| vid\.prometheus\.systems| vid\.soafen\.love| vid\.twhtv\.club| vid\.wildeboer\.net| video-cave-v2\.de| video-liberty\.com| video\.076\.ne\.jp| video\.1146\.nohost\.me| video\.9wd\.eu| video\.abraum\.de| video\.ados\.accoord\.fr| video\.amiga-ng\.org| video\.anartist\.org| video\.asgardius\.company| video\.audiovisuel-participatif\.org| video\.bards\.online| video\.barkoczy\.social| video\.benetou\.fr| video\.beyondwatts\.social| video\.bgeneric\.net| video\.bilecik\.edu\.tr| video\.blast-info\.fr| video\.bmu\.cloud| video\.catgirl\.biz| video\.causa-arcana\.com| video\.chasmcity\.net| video\.chbmeyer\.de| video\.cigliola\.com| video\.citizen4\.eu| video\.clumsy\.computer| video\.cnnumerique\.fr| video\.cnr\.it| video\.cnt\.social| video\.coales\.co| video\.comune\.trento\.it| video\.coyp\.us| video\.csc49\.fr| video\.davduf\.net| video\.davejansen\.com| video\.dlearning\.nl| video\.dnfi\.no| video\.dresden\.network| video\.drgnz\.club| video\.dudenas\.lt| video\.eientei\.org| video\.ellijaymakerspace\.org| video\.emergeheart\.info| video\.eradicatinglove\.xyz| video\.everythingbagel\.me| video\.extremelycorporate\.ca| video\.fabiomanganiello\.com| video\.fedi\.bzh| video\.fhtagn\.org| video\.firehawk-systems\.com| video\.fox-romka\.ru| video\.fuss\.bz\.it| video\.glassbeadcollective\.org| video\.graine-pdl\.org| video\.gyt\.is| video\.hainry\.fr| video\.hardlimit\.com| video\.hostux\.net| video\.igem\.org| video\.infojournal\.fr| video\.internet-czas-dzialac\.pl| video\.interru\.io| video\.ipng\.ch| video\.ironsysadmin\.com| video\.islameye\.com| video\.jacen\.moe| video\.jadin\.me| video\.jeffmcbride\.net| video\.jigmedatse\.com| video\.kuba-orlik\.name| video\.lacalligramme\.fr| video\.lanceurs-alerte\.fr| video\.laotra\.red| video\.lapineige\.fr| video\.laraffinerie\.re| video\.lavolte\.net| video\.liberta\.vip| video\.libreti\.net| video\.licentia\.net| video\.linc\.systems| video\.linux\.it| video\.linuxtrent\.it| video\.liveitlive\.show| video\.lono\.space| video\.lrose\.de| video\.lunago\.net| video\.lundi\.am| video\.lycee-experimental\.org| video\.maechler\.cloud| video\.marcorennmaus\.de| video\.mass-trespass\.uk| video\.matomocamp\.org| video\.medienzentrum-harburg\.de| video\.mentality\.rip| video\.metaversum\.wtf| video\.midreality\.com| video\.mttv\.it| video\.mugoreve\.fr| video\.mxtthxw\.art| video\.mycrowd\.ca| video\.niboe\.info| video\.nogafam\.es| video\.nstr\.no| video\.occm\.cc| video\.off-investigation\.fr| video\.olos311\.org| video\.ordinobsolete\.fr| video\.osvoj\.ru| video\.ourcommon\.cloud| video\.ozgurkon\.org| video\.pcf\.fr| video\.pcgaldo\.com| video\.phyrone\.de| video\.poul\.org| video\.publicspaces\.net| video\.pullopen\.xyz| video\.r3s\.nrw| video\.rainevixen\.com| video\.resolutions\.it| video\.retroedge\.tech| video\.rhizome\.org| video\.rlp-media\.de| video\.rs-einrich\.de| video\.rubdos\.be| video\.sadmin\.io| video\.sftblw\.moe| video\.shitposter\.club| video\.simplex-software\.ru| video\.slipfox\.xyz| video\.snug\.moe| video\.software-fuer-engagierte\.de| video\.soi\.ch| video\.sonet\.ws| video\.surazal\.net| video\.taskcards\.eu| video\.team-lcbs\.eu| video\.techforgood\.social| video\.telemillevaches\.net| video\.thepolarbear\.co\.uk| video\.thinkof\.name| video\.tii\.space| video\.tkz\.es| video\.trankil\.info| video\.triplea\.fr| video\.tum\.social| video\.turbo\.chat| video\.uriopss-pdl\.fr| video\.ustim\.ru| video\.ut0pia\.org| video\.vaku\.org\.ua| video\.vegafjord\.me| video\.veloma\.org| video\.violoncello\.ch| video\.voidconspiracy\.band| video\.wakkeren\.nl| video\.windfluechter\.org| video\.ziez\.eu| videos-passages\.huma-num\.fr| videos\.aadtp\.be| videos\.ahp-numerique\.fr| videos\.alamaisondulibre\.org| videos\.archigny\.net| videos\.aroaduntraveled\.com| videos\.b4tech\.org| videos\.benjaminbrady\.ie| videos\.bik\.opencloud\.lu| videos\.cloudron\.io| videos\.codingotaku\.com| videos\.coletivos\.org| videos\.collate\.social| videos\.danksquad\.org| videos\.digitaldragons\.eu| videos\.dromeadhere\.fr| videos\.explain-it\.org| videos\.factsonthegroundshow\.com| videos\.foilen\.com| videos\.fsci\.in| videos\.gamercast\.net| videos\.gianmarco\.gg| videos\.globenet\.org| videos\.grafo\.zone| videos\.hauspie\.fr| videos\.hush\.is| videos\.hyphalfusion\.network| videos\.icum\.to| videos\.im\.allmendenetz\.de| videos\.jacksonchen666\.com| videos\.john-livingston\.fr| videos\.knazarov\.com| videos\.kuoushi\.com| videos\.laliguepaysdelaloire\.org| videos\.lemouvementassociatif-pdl\.org| videos\.leslionsfloorball\.fr| videos\.librescrum\.org| videos\.mastodont\.cat| videos\.metus\.ca| videos\.miolo\.org| videos\.offroad\.town| videos\.openmandriva\.org| videos\.parleur\.net| videos\.pcorp\.us| videos\.pop\.eu\.com| videos\.rampin\.org| videos\.rauten\.co\.za| videos\.ritimo\.org| videos\.sarcasmstardust\.com| videos\.scanlines\.xyz| videos\.shmalls\.pw| videos\.stadtfabrikanten\.org| videos\.supertuxkart\.net| videos\.testimonia\.org| videos\.thinkerview\.com| videos\.torrenezzi10\.xyz| videos\.trom\.tf| videos\.utsukta\.org| videos\.viorsan\.com| videos\.wherelinux\.xyz| videos\.wikilibriste\.fr| videos\.yesil\.club| videos\.yeswiki\.net| videotube\.duckdns\.org| vids\.capypara\.de| vids\.roshless\.me| vids\.stary\.pc\.pl| vids\.tekdmn\.me| vidz\.julien\.ovh| views\.southfox\.me| virtual-girls-are\.definitely-for\.me| viste\.pt| vnchich\.com| vnop\.org| vod\.newellijay\.tv| voluntarytube\.com| vtr\.chikichiki\.tube| vulgarisation-informatique\.fr| watch\.easya\.solutions| watch\.goodluckgabe\.life| watch\.ignorance\.eu| watch\.jimmydore\.com| watch\.libertaria\.space| watch\.nuked\.social| watch\.ocaml\.org| watch\.thelema\.social| watch\.tubelab\.video| web-fellow\.de| webtv\.vandoeuvre\.net| wetubevid\.online| wikileaks\.video| wiwi\.video| wow\.such\.disappointment\.fail| www\.jvideos\.net| www\.kotikoff\.net| www\.makertube\.net| www\.mypeer\.tube| www\.nadajemy\.com| www\.neptube\.io| www\.rocaguinarda\.tv| www\.vnshow\.net| xxivproduction\.video| yt\.orokoro\.ru| ytube\.retronerd\.at| zumvideo\.de| # from youtube-dl peertube\.rainbowswingers\.net| tube\.stanisic\.nl| peer\.suiri\.us| medias\.libox\.fr| videomensoif\.ynh\.fr| peertube\.travelpandas\.eu| peertube\.rachetjay\.fr| peertube\.montecsys\.fr| tube\.eskuero\.me| peer\.tube| peertube\.umeahackerspace\.se| tube\.nx-pod\.de| video\.monsieurbidouille\.fr| tube\.openalgeria\.org| vid\.lelux\.fi| video\.anormallostpod\.ovh| tube\.crapaud-fou\.org| peertube\.stemy\.me| lostpod\.space| exode\.me| peertube\.snargol\.com| vis\.ion\.ovh| videosdulib\.re| v\.mbius\.io| videos\.judrey\.eu| peertube\.osureplayviewer\.xyz| peertube\.mathieufamily\.ovh| www\.videos-libr\.es| fightforinfo\.com| peertube\.fediverse\.ru| peertube\.oiseauroch\.fr| video\.nesven\.eu| v\.bearvideo\.win| video\.qoto\.org| justporn\.cc| video\.vny\.fr| peervideo\.club| tube\.taker\.fr| peertube\.chantierlibre\.org| tube\.ipfixe\.info| tube\.kicou\.info| tube\.dodsorf\.as| videobit\.cc| video\.yukari\.moe| videos\.elbinario\.net| hkvideo\.live| pt\.tux\.tf| www\.hkvideo\.live| FIGHTFORINFO\.com| pt\.765racing\.com| peertube\.gnumeria\.eu\.org| nordenmedia\.com| peertube\.co\.uk| tube\.darfweb\.eu| tube\.kalah-france\.org| 0ch\.in| vod\.mochi\.academy| film\.node9\.org| peertube\.hatthieves\.es| video\.fitchfamily\.org| peertube\.ddns\.net| video\.ifuncle\.kr| video\.fdlibre\.eu| tube\.22decembre\.eu| peertube\.harmoniescreatives\.com| tube\.fabrigli\.fr| video\.thedwyers\.co| video\.bruitbruit\.com| peertube\.foxfam\.club| peer\.philoxweb\.be| videos\.bugs\.social| peertube\.malbert\.xyz| peertube\.bilange\.ca| libretube\.net| diytelevision\.com| peertube\.fedilab\.app| libre\.video| video\.mstddntfdn\.online| us\.tv| peertube\.sl-network\.fr| peertube\.dynlinux\.io| peertube\.david\.durieux\.family| peertube\.linuxrocks\.online| peerwatch\.xyz| v\.kretschmann\.social| tube\.otter\.sh| yt\.is\.nota\.live| tube\.dragonpsi\.xyz| peertube\.boneheadmedia\.com| videos\.funkwhale\.audio| watch\.44con\.com| peertube\.gcaillaut\.fr| peertube\.icu| pony\.tube| spacepub\.space| tube\.stbr\.io| v\.mom-gay\.faith| tube\.port0\.xyz| peertube\.simounet\.net| play\.jergefelt\.se| peertube\.zeteo\.me| tube\.danq\.me| peertube\.kerenon\.com| tube\.fab-l3\.org| tube\.calculate\.social| peertube\.mckillop\.org| tube\.netzspielplatz\.de| vod\.ksite\.de| peertube\.laas\.fr| tube\.govital\.net| peertube\.stephenson\.cc| bistule\.nohost\.me| peertube\.kajalinifi\.de| video\.ploud\.jp| video\.omniatv\.com| peertube\.ffs2play\.fr| peertube\.leboulaire\.ovh| peertube\.tronic-studio\.com| peertube\.public\.cat| peertube\.metalbanana\.net| video\.1000i100\.fr| peertube\.alter-nativ-voll\.de| tube\.pasa\.tf| tube\.worldofhauru\.xyz| pt\.kamp\.site| peertube\.teleassist\.fr| videos\.mleduc\.xyz| conf\.tube| media\.privacyinternational\.org| pt\.forty-two\.nl| video\.halle-leaks\.de| video\.grosskopfgames\.de| peertube\.schaeferit\.de| peertube\.jackbot\.fr| tube\.extinctionrebellion\.fr| peertube\.f-si\.org| video\.subak\.ovh| videos\.koweb\.fr| peertube\.zergy\.net| peertube\.roflcopter\.fr| peertube\.floss-marketing-school\.com| vloggers\.social| peertube\.iriseden\.eu| videos\.ubuntu-paris\.org| peertube\.mastodon\.host| armstube\.com| peertube\.s2s\.video| peertube\.lol| tube\.open-plug\.eu| open\.tube| peertube\.ch| peertube\.normandie-libre\.fr| peertube\.slat\.org| video\.lacaveatonton\.ovh| peertube\.uno| peertube\.servebeer\.com| peertube\.fedi\.quebec| tube\.h3z\.jp| tube\.plus200\.com| peertube\.eric\.ovh| tube\.metadocs\.cc| tube\.unmondemeilleur\.eu| gouttedeau\.space| video\.antirep\.net| nrop\.cant\.at| tube\.ksl-bmx\.de| tube\.plaf\.fr| tube\.tchncs\.de| video\.devinberg\.com| hitchtube\.fr| peertube\.kosebamse\.com| yunopeertube\.myddns\.me| peertube\.varney\.fr| peertube\.anon-kenkai\.com| tube\.maiti\.info| tubee\.fr| videos\.dinofly\.com| toobnix\.org| videotape\.me| voca\.tube| video\.heromuster\.com| video\.lemediatv\.fr| video\.up\.edu\.ph| balafon\.video| video\.ivel\.fr| thickrips\.cloud| pt\.laurentkruger\.fr| video\.monarch-pass\.net| peertube\.artica\.center| video\.alternanet\.fr| indymotion\.fr| fanvid\.stopthatimp\.net| video\.farci\.org| v\.lesterpig\.com| video\.okaris\.de| tube\.pawelko\.net| peertube\.mablr\.org| tube\.fede\.re| pytu\.be| evertron\.tv| devtube\.dev-wiki\.de| raptube\.antipub\.org| video\.selea\.se| peertube\.mygaia\.org| video\.oh14\.de| peertube\.livingutopia\.org| peertube\.the-penguin\.de| tube\.thechangebook\.org| tube\.anjara\.eu| pt\.pube\.tk| video\.samedi\.pm| mplayer\.demouliere\.eu| widemus\.de| peertube\.me| peertube\.zapashcanon\.fr| video\.latavernedejohnjohn\.fr| peertube\.pcservice46\.fr| peertube\.mazzonetto\.eu| video\.irem\.univ-paris-diderot\.fr| video\.livecchi\.cloud| alttube\.fr| video\.coop\.tools| video\.cabane-libre\.org| peertube\.openstreetmap\.fr| videos\.alolise\.org| irrsinn\.video| video\.antopie\.org| scitech\.video| tube2\.nemsia\.org| video\.amic37\.fr| peertube\.freeforge\.eu| video\.arbitrarion\.com| video\.datsemultimedia\.com| stoptrackingus\.tv| peertube\.ricostrongxxx\.com| docker\.videos\.lecygnenoir\.info| peertube\.togart\.de| tube\.postblue\.info| videos\.domainepublic\.net| peertube\.cyber-tribal\.com| video\.gresille\.org| peertube\.dsmouse\.net| cinema\.yunohost\.support| tube\.theocevaer\.fr| repro\.video| tube\.4aem\.com| quaziinc\.com| peertube\.metawurst\.space| videos\.wakapo\.com| video\.ploud\.fr| video\.freeradical\.zone| tube\.valinor\.fr| refuznik\.video| pt\.kircheneuenburg\.de| peertube\.asrun\.eu| peertube\.lagob\.fr| videos\.side-ways\.net| 91video\.online| video\.valme\.io| video\.taboulisme\.com| videos-libr\.es| tv\.mooh\.fr| nuage\.acostey\.fr| video\.monsieur-a\.fr| peertube\.librelois\.fr| videos\.pair2jeux\.tube| videos\.pueseso\.club| peer\.mathdacloud\.ovh| media\.assassinate-you\.net| vidcommons\.org| ptube\.rousset\.nom\.fr| tube\.cyano\.at| videos\.squat\.net| video\.iphodase\.fr| peertube\.makotoworkshop\.org| peertube\.serveur\.slv-valbonne\.fr| vault\.mle\.party| hostyour\.tv| videos\.hack2g2\.fr| libre\.tube| pire\.artisanlogiciel\.net| videos\.numerique-en-commun\.fr| video\.netsyms\.com| video\.die-partei\.social| video\.writeas\.org| peertube\.swarm\.solvingmaz\.es| tube\.pericoloso\.ovh| watching\.cypherpunk\.observer| videos\.adhocmusic\.com| tube\.rfc1149\.net| peertube\.librelabucm\.org| videos\.numericoop\.fr| peertube\.koehn\.com| peertube\.anarchmusicall\.net| tube\.kampftoast\.de| vid\.y-y\.li| peertube\.xtenz\.xyz| diode\.zone| tube\.egf\.mn| peertube\.nomagic\.uk| visionon\.tv| videos\.koumoul\.com| video\.rastapuls\.com| video\.mantlepro\.com| video\.deadsuperhero\.com| peertube\.musicstudio\.pro| peertube\.we-keys\.fr| artitube\.artifaille\.fr| peertube\.ethernia\.net| tube\.midov\.pl| peertube\.fr| watch\.snoot\.tube| peertube\.donnadieu\.fr| argos\.aquilenet\.fr| tube\.nemsia\.org| tube\.bruniau\.net| videos\.darckoune\.moe| tube\.traydent\.info| dev\.videos\.lecygnenoir\.info| peertube\.nayya\.org| peertube\.live| peertube\.mofgao\.space| video\.lequerrec\.eu| peertube\.amicale\.net| aperi\.tube| tube\.ac-lyon\.fr| video\.lw1\.at| www\.yiny\.org| videos\.pofilo\.fr| tube\.lou\.lt| choob\.h\.etbus\.ch| tube\.hoga\.fr| peertube\.heberge\.fr| video\.obermui\.de| videos\.cloudfrancois\.fr| betamax\.video| video\.typica\.us| tube\.piweb\.be| video\.blender\.org| peertube\.cat| tube\.kdy\.ch| pe\.ertu\.be| peertube\.social| videos\.lescommuns\.org| tv\.datamol\.org| videonaute\.fr| dialup\.express| peertube\.nogafa\.org| megatube\.lilomoino\.fr| peertube\.tamanoir\.foucry\.net| peertube\.devosi\.org| peertube\.1312\.media| tube\.bootlicker\.party| skeptikon\.fr| video\.blueline\.mg| tube\.homecomputing\.fr| tube\.ouahpiti\.info| video\.tedomum\.net| video\.g3l\.org| fontube\.fr| peertube\.gaialabs\.ch| tube\.kher\.nl| peertube\.qtg\.fr| video\.migennes\.net| tube\.p2p\.legal| troll\.tv| videos\.iut-orsay\.fr| peertube\.solidev\.net| videos\.cemea\.org| video\.passageenseine\.fr| videos\.festivalparminous\.org| peertube\.touhoppai\.moe| sikke\.fi| peer\.hostux\.social| share\.tube| peertube\.walkingmountains\.fr| videos\.benpro\.fr| peertube\.parleur\.net| peertube\.heraut\.eu| tube\.aquilenet\.fr| peertube\.gegeweb\.eu| framatube\.org| thinkerview\.video| tube\.conferences-gesticulees\.net| peertube\.datagueule\.tv| video\.lqdn\.fr| tube\.mochi\.academy| media\.zat\.im| video\.colibris-outilslibres\.org| tube\.svnet\.fr| peertube\.video| peertube2\.cpy\.re| peertube3\.cpy\.re| videos\.tcit\.fr| peertube\.cpy\.re| canard\.tube )''' _UUID_RE = r'[\da-zA-Z]{22}|[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}' _API_BASE = 'https://%s/api/v1/videos/%s/%s' _VALID_URL = rf'''(?x) (?: peertube:(?P[^:]+):| https?://(?P{_INSTANCES_RE})/(?:videos/(?:watch|embed)|api/v\d/videos|w)/ ) (?P{_UUID_RE}) ''' _EMBED_REGEX = [r'''(?x)]+\bsrc=["\'](?P(?:https?:)?//{_INSTANCES_RE}/videos/embed/{cls._UUID_RE})'''] _TESTS = [{ 'url': 'https://framatube.org/videos/watch/9c9de5e8-0a1e-484a-b099-e80766180a6d', 'md5': '8563064d245a4be5705bddb22bb00a28', 'info_dict': { 'id': '9c9de5e8-0a1e-484a-b099-e80766180a6d', 'ext': 'mp4', 'title': 'What is PeerTube?', 'description': 'md5:3fefb8dde2b189186ce0719fda6f7b10', 'thumbnail': r're:https?://.*\.(?:jpg|png)', 'timestamp': 1538391166, 'upload_date': '20181001', 'uploader': 'Framasoft', 'uploader_id': '3', 'uploader_url': 'https://framatube.org/accounts/framasoft', 'channel': 'A propos de PeerTube', 'channel_id': '2215', 'channel_url': 'https://framatube.org/video-channels/joinpeertube', 'language': 'en', 'license': 'Attribution - Share Alike', 'duration': 113, 'view_count': int, 'like_count': int, 'dislike_count': int, 'tags': ['framasoft', 'peertube'], 'categories': ['Science & Technology'], }, }, { 'url': 'https://peertube2.cpy.re/w/122d093a-1ede-43bd-bd34-59d2931ffc5e', 'info_dict': { 'id': '122d093a-1ede-43bd-bd34-59d2931ffc5e', 'ext': 'mp4', 'title': 'E2E tests', 'uploader_id': '37855', 'timestamp': 1589276219, 'upload_date': '20200512', 'uploader': 'chocobozzz', }, }, { 'url': 'https://peertube2.cpy.re/w/3fbif9S3WmtTP8gGsC5HBd', 'info_dict': { 'id': '3fbif9S3WmtTP8gGsC5HBd', 'ext': 'mp4', 'title': 'E2E tests', 'uploader_id': '37855', 'timestamp': 1589276219, 'upload_date': '20200512', 'uploader': 'chocobozzz', }, }, { 'url': 'https://peertube2.cpy.re/api/v1/videos/3fbif9S3WmtTP8gGsC5HBd', 'info_dict': { 'id': '3fbif9S3WmtTP8gGsC5HBd', 'ext': 'mp4', 'title': 'E2E tests', 'uploader_id': '37855', 'timestamp': 1589276219, 'upload_date': '20200512', 'uploader': 'chocobozzz', }, }, { # Issue #26002 'url': 'peertube:spacepub.space:d8943b2d-8280-497b-85ec-bc282ec2afdc', 'info_dict': { 'id': 'd8943b2d-8280-497b-85ec-bc282ec2afdc', 'ext': 'mp4', 'title': 'Dot matrix printer shell demo', 'uploader_id': '3', 'timestamp': 1587401293, 'upload_date': '20200420', 'uploader': 'Drew DeVault', }, }, { 'url': 'https://peertube.debian.social/videos/watch/0b04f13d-1e18-4f1d-814e-4979aa7c9c44', 'only_matching': True, }, { # nsfw 'url': 'https://vod.ksite.de/videos/watch/9bb88cd3-9959-46d9-9ab9-33d2bb704c39', 'only_matching': True, }, { 'url': 'https://vod.ksite.de/videos/embed/fed67262-6edb-4d1c-833b-daa9085c71d7', 'only_matching': True, }, { 'url': 'https://peertube.tv/api/v1/videos/c1875674-97d0-4c94-a058-3f7e64c962e8', 'only_matching': True, }, { 'url': 'peertube:framatube.org:b37a5b9f-e6b5-415c-b700-04a5cd6ec205', 'only_matching': True, }] @staticmethod def _extract_peertube_url(webpage, source_url): mobj = re.match( rf'https?://(?P[^/]+)/(?:videos/(?:watch|embed)|w)/(?P{PeerTubeIE._UUID_RE})', source_url) if mobj and any(p in webpage for p in ( 'meta property="og:platform" content="PeerTube"', 'PeerTube<', 'There will be other non JS-based clients to access PeerTube', '>We are sorry but it seems that PeerTube is not compatible with your web browser.<')): return 'peertube:{}:{}'.format(*mobj.group('host', 'id')) @classmethod def _extract_embed_urls(cls, url, webpage): embeds = tuple(super()._extract_embed_urls(url, webpage)) if embeds: return embeds peertube_url = cls._extract_peertube_url(webpage, url) if peertube_url: return [peertube_url] def _call_api(self, host, video_id, path, note=None, errnote=None, fatal=True): return self._download_json( self._API_BASE % (host, video_id, path), video_id, note=note, errnote=errnote, fatal=fatal) def _get_subtitles(self, host, video_id): captions = self._call_api( host, video_id, 'captions', note='Downloading captions JSON', fatal=False) if not isinstance(captions, dict): return data = captions.get('data') if not isinstance(data, list): return subtitles = {} for e in data: language_id = try_get(e, lambda x: x['language']['id'], str) caption_url = urljoin(f'https://{host}', e.get('captionPath')) if not caption_url: continue subtitles.setdefault(language_id or 'en', []).append({ 'url': caption_url, }) return subtitles def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') or mobj.group('host_2') video_id = mobj.group('id') video = self._call_api( host, video_id, '', note='Downloading video JSON') title = video['name'] formats, is_live = [], False files = video.get('files') or [] for playlist in (video.get('streamingPlaylists') or []): if not isinstance(playlist, dict): continue if playlist_url := url_or_none(playlist.get('playlistUrl')): is_live = True formats.extend(self._extract_m3u8_formats( playlist_url, video_id, fatal=False, live=True)) playlist_files = playlist.get('files') if not (playlist_files and isinstance(playlist_files, list)): continue files.extend(playlist_files) for file_ in files: if not isinstance(file_, dict): continue file_url = url_or_none(file_.get('fileUrl')) if not file_url: continue file_size = int_or_none(file_.get('size')) format_id = try_get( file_, lambda x: x['resolution']['label'], str) f = parse_resolution(format_id) f.update({ 'url': file_url, 'format_id': format_id, 'filesize': file_size, }) if format_id == '0p': f['vcodec'] = 'none' else: f['fps'] = int_or_none(file_.get('fps')) is_live = False formats.append(f) description = video.get('description') if description and len(description) >= 250: # description is shortened full_description = self._call_api( host, video_id, 'description', note='Downloading description JSON', fatal=False) if isinstance(full_description, dict): description = str_or_none(full_description.get('description')) or description subtitles = self.extract_subtitles(host, video_id) def data(section, field, type_): return try_get(video, lambda x: x[section][field], type_) def account_data(field, type_): return data('account', field, type_) def channel_data(field, type_): return data('channel', field, type_) category = data('category', 'label', str) categories = [category] if category else None nsfw = video.get('nsfw') if nsfw is bool: age_limit = 18 if nsfw else 0 else: age_limit = None webpage_url = f'https://{host}/videos/watch/{video_id}' return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': urljoin(webpage_url, video.get('thumbnailPath')), 'timestamp': unified_timestamp(video.get('publishedAt')), 'uploader': account_data('displayName', str), 'uploader_id': str_or_none(account_data('id', int)), 'uploader_url': url_or_none(account_data('url', str)), 'channel': channel_data('displayName', str), 'channel_id': str_or_none(channel_data('id', int)), 'channel_url': url_or_none(channel_data('url', str)), 'language': data('language', 'id', str), 'license': data('licence', 'label', str), 'duration': int_or_none(video.get('duration')), 'view_count': int_or_none(video.get('views')), 'like_count': int_or_none(video.get('likes')), 'dislike_count': int_or_none(video.get('dislikes')), 'age_limit': age_limit, 'tags': try_get(video, lambda x: x['tags'], list), 'categories': categories, 'formats': formats, 'subtitles': subtitles, 'is_live': is_live, 'webpage_url': webpage_url, } class PeerTubePlaylistIE(InfoExtractor): IE_NAME = 'PeerTube:Playlist' _TYPES = { 'a': 'accounts', 'c': 'video-channels', 'w/p': 'video-playlists', } _VALID_URL = r'''(?x) https?://(?P<host>{})/(?P<type>(?:{}))/ (?P<id>[^/]+) '''.format(PeerTubeIE._INSTANCES_RE, '|'.join(_TYPES.keys())) _TESTS = [{ 'url': 'https://peertube.debian.social/w/p/hFdJoTuyhNJVa1cDWd1d12', 'info_dict': { 'id': 'hFdJoTuyhNJVa1cDWd1d12', 'description': 'Diversas palestras do Richard Stallman no Brasil.', 'title': 'Richard Stallman no Brasil', 'timestamp': 1599676222, }, 'playlist_mincount': 9, }, { 'url': 'https://peertube2.cpy.re/a/chocobozzz/videos', 'info_dict': { 'id': 'chocobozzz', 'timestamp': 1553874564, 'title': 'chocobozzz', }, 'playlist_mincount': 2, }, { 'url': 'https://framatube.org/c/bf54d359-cfad-4935-9d45-9d6be93f63e8/videos', 'info_dict': { 'id': 'bf54d359-cfad-4935-9d45-9d6be93f63e8', 'timestamp': 1519917377, 'title': 'Les vidéos de Framasoft', }, 'playlist_mincount': 345, }, { 'url': 'https://peertube2.cpy.re/c/blender_open_movies@video.blender.org/videos', 'info_dict': { 'id': 'blender_open_movies@video.blender.org', 'timestamp': 1542287810, 'title': 'Official Blender Open Movies', }, 'playlist_mincount': 11, }] _API_BASE = 'https://%s/api/v1/%s/%s%s' _PAGE_SIZE = 30 def call_api(self, host, name, path, base, **kwargs): return self._download_json( self._API_BASE % (host, base, name, path), name, **kwargs) def fetch_page(self, host, playlist_id, playlist_type, page): page += 1 video_data = self.call_api( host, playlist_id, f'/videos?sort=-createdAt&start={self._PAGE_SIZE * (page - 1)}&count={self._PAGE_SIZE}&nsfw=both', playlist_type, note=f'Downloading page {page}').get('data', []) for video in video_data: short_uuid = video.get('shortUUID') or try_get(video, lambda x: x['video']['shortUUID']) video_title = video.get('name') or try_get(video, lambda x: x['video']['name']) yield self.url_result( f'https://{host}/w/{short_uuid}', PeerTubeIE.ie_key(), video_id=short_uuid, video_title=video_title) def _extract_playlist(self, host, playlist_type, playlist_id): info = self.call_api(host, playlist_id, '', playlist_type, note='Downloading playlist information', fatal=False) playlist_title = info.get('displayName') playlist_description = info.get('description') playlist_timestamp = unified_timestamp(info.get('createdAt')) channel = try_get(info, lambda x: x['ownerAccount']['name']) or info.get('displayName') channel_id = try_get(info, lambda x: x['ownerAccount']['id']) or info.get('id') thumbnail = format_field(info, 'thumbnailPath', f'https://{host}%s') entries = OnDemandPagedList(functools.partial( self.fetch_page, host, playlist_id, playlist_type), self._PAGE_SIZE) return self.playlist_result( entries, playlist_id, playlist_title, playlist_description, timestamp=playlist_timestamp, channel=channel, channel_id=channel_id, thumbnail=thumbnail) def _real_extract(self, url): playlist_type, host, playlist_id = self._match_valid_url(url).group('type', 'host', 'id') return self._extract_playlist(host, self._TYPES[playlist_type], playlist_id) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/peertv.py��������������������������������������������������������0000664�0000000�0000000�00000004177�14675634471�0020551�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import js_to_json class PeerTVIE(InfoExtractor): IE_NAME = 'peer.tv' _VALID_URL = r'https?://(?:www\.)?peer\.tv/(?:de|it|en)/(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.peer.tv/de/841', 'info_dict': { 'id': '841', 'ext': 'mp4', 'title': 'Die Brunnenburg', 'description': 'md5:4395f6142b090338340ab88a3aae24ed', }, }, { 'url': 'https://www.peer.tv/it/404', 'info_dict': { 'id': '404', 'ext': 'mp4', 'title': 'Cascate di ghiaccio in Val Gardena', 'description': 'md5:e8e5907f236171842674e8090e3577b8', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_key = self._html_search_regex(r'player\.peer\.tv/js/([a-zA-Z0-9]+)', webpage, 'video key') js = self._download_webpage(f'https://player.peer.tv/js/{video_key}/', video_id, headers={'Referer': 'https://www.peer.tv/'}, note='Downloading session id') session_id = self._search_regex(r'["\']session_id["\']:\s*["\']([a-zA-Z0-9]+)["\']', js, 'session id') player_webpage = self._download_webpage( f'https://player.peer.tv/jsc/{video_key}/{session_id}?jsr=aHR0cHM6Ly93d3cucGVlci50di9kZS84NDE=&cs=UTF-8&mq=2&ua=0&webm=p&mp4=p&hls=1', video_id, note='Downloading player webpage') m3u8_url = self._search_regex(r'["\']playlist_url["\']:\s*(["\'][^"\']+["\'])', player_webpage, 'm3u8 url') m3u8_url = self._parse_json(m3u8_url, video_id, transform_source=js_to_json) formats = self._extract_m3u8_formats(m3u8_url, video_id, m3u8_id='hls') return { 'id': video_id, 'title': self._html_search_regex(r'<h1>(.+?)</h1>', webpage, 'title').replace('\xa0', ' '), 'formats': formats, 'description': self._html_search_meta(('og:description', 'description'), webpage), 'thumbnail': self._html_search_meta(('og:image', 'image'), webpage), } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/peloton.py�������������������������������������������������������0000664�0000000�0000000�00000021251�14675634471�0020714�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import re import urllib.parse from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, float_or_none, str_or_none, traverse_obj, url_or_none, ) class PelotonIE(InfoExtractor): IE_NAME = 'peloton' _NETRC_MACHINE = 'peloton' _VALID_URL = r'https?://members\.onepeloton\.com/classes/player/(?P<id>[a-f0-9]+)' _TESTS = [{ 'url': 'https://members.onepeloton.com/classes/player/0e9653eb53544eeb881298c8d7a87b86', 'info_dict': { 'id': '0e9653eb53544eeb881298c8d7a87b86', 'title': '20 min Chest & Back Strength', 'ext': 'mp4', 'thumbnail': r're:^https?://.+\.jpg', 'description': 'md5:fcd5be9b9eda0194b470e13219050a66', 'creator': 'Chase Tucker', 'release_timestamp': 1556141400, 'timestamp': 1556141400, 'upload_date': '20190424', 'duration': 1389, 'categories': ['Strength'], 'tags': ['Workout Mat', 'Light Weights', 'Medium Weights'], 'is_live': False, 'chapters': 'count:1', 'subtitles': {'en': [{ 'url': r're:^https?://.+', 'ext': 'vtt', }]}, }, 'params': { 'skip_download': 'm3u8', }, 'skip': 'Account needed', }, { 'url': 'https://members.onepeloton.com/classes/player/26603d53d6bb4de1b340514864a6a6a8', 'info_dict': { 'id': '26603d53d6bb4de1b340514864a6a6a8', 'title': '30 min Earth Day Run', 'ext': 'm4a', 'thumbnail': r're:https://.+\.jpg', 'description': 'md5:adc065a073934d7ee0475d217afe0c3d', 'creator': 'Selena Samuela', 'release_timestamp': 1587567600, 'timestamp': 1587567600, 'upload_date': '20200422', 'duration': 1802, 'categories': ['Running'], 'is_live': False, 'chapters': 'count:3', }, 'params': { 'skip_download': 'm3u8', }, 'skip': 'Account needed', }] _MANIFEST_URL_TEMPLATE = '%s?hdnea=%s' def _start_session(self, video_id): self._download_webpage('https://api.onepeloton.com/api/started_client_session', video_id, note='Starting session') def _login(self, video_id): username, password = self._get_login_info() if not (username and password): self.raise_login_required() try: self._download_json( 'https://api.onepeloton.com/auth/login', video_id, note='Logging in', data=json.dumps({ 'username_or_email': username, 'password': password, 'with_pubsub': False, }).encode(), headers={'Content-Type': 'application/json', 'User-Agent': 'web'}) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: json_string = self._webpage_read_content(e.cause.response, None, video_id) res = self._parse_json(json_string, video_id) raise ExtractorError(res['message'], expected=res['message'] == 'Login failed') else: raise def _get_token(self, video_id): try: subscription = self._download_json( 'https://api.onepeloton.com/api/subscription/stream', video_id, note='Downloading token', data=json.dumps({}).encode(), headers={'Content-Type': 'application/json'}) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: json_string = self._webpage_read_content(e.cause.response, None, video_id) res = self._parse_json(json_string, video_id) raise ExtractorError(res['message'], expected=res['message'] == 'Stream limit reached') else: raise return subscription['token'] def _real_extract(self, url): video_id = self._match_id(url) try: self._start_session(video_id) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: self._login(video_id) self._start_session(video_id) else: raise metadata = self._download_json(f'https://api.onepeloton.com/api/ride/{video_id}/details?stream_source=multichannel', video_id) ride_data = metadata.get('ride') if not ride_data: raise ExtractorError('Missing stream metadata') token = self._get_token(video_id) is_live = False if ride_data.get('content_format') == 'audio': url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('vod_stream_url'), urllib.parse.quote(token)) formats = [{ 'url': url, 'ext': 'm4a', 'format_id': 'audio', 'vcodec': 'none', }] subtitles = {} else: if ride_data.get('vod_stream_url'): url = 'https://members.onepeloton.com/.netlify/functions/m3u8-proxy?displayLanguage=en&acceptedSubtitles={}&url={}?hdnea={}'.format( ','.join([re.sub('^([a-z]+)-([A-Z]+)$', r'\1', caption) for caption in ride_data['captions']]), ride_data['vod_stream_url'], urllib.parse.quote(urllib.parse.quote(token))) elif ride_data.get('live_stream_url'): url = self._MANIFEST_URL_TEMPLATE % (ride_data.get('live_stream_url'), urllib.parse.quote(token)) is_live = True else: raise ExtractorError('Missing video URL') formats, subtitles = self._extract_m3u8_formats_and_subtitles(url, video_id, 'mp4') if metadata.get('instructor_cues'): subtitles['cues'] = [{ 'data': json.dumps(metadata.get('instructor_cues')), 'ext': 'json', }] category = ride_data.get('fitness_discipline_display_name') chapters = [{ 'start_time': segment.get('start_time_offset'), 'end_time': segment.get('start_time_offset') + segment.get('length'), 'title': segment.get('name'), } for segment in traverse_obj(metadata, ('segments', 'segment_list'))] return { 'id': video_id, 'title': ride_data.get('title'), 'formats': formats, 'thumbnail': url_or_none(ride_data.get('image_url')), 'description': str_or_none(ride_data.get('description')), 'creator': traverse_obj(ride_data, ('instructor', 'name')), 'release_timestamp': ride_data.get('original_air_time'), 'timestamp': ride_data.get('original_air_time'), 'subtitles': subtitles, 'duration': float_or_none(ride_data.get('length')), 'categories': [category] if category else None, 'tags': traverse_obj(ride_data, ('equipment_tags', ..., 'name')), 'is_live': is_live, 'chapters': chapters, } class PelotonLiveIE(InfoExtractor): IE_NAME = 'peloton:live' IE_DESC = 'Peloton Live' _VALID_URL = r'https?://members\.onepeloton\.com/player/live/(?P<id>[a-f0-9]+)' _TEST = { 'url': 'https://members.onepeloton.com/player/live/eedee2d19f804a9788f53aa8bd38eb1b', 'info_dict': { 'id': '32edc92d28044be5bf6c7b6f1f8d1cbc', 'title': '30 min HIIT Ride: Live from Home', 'ext': 'mp4', 'thumbnail': r're:^https?://.+\.png', 'description': 'md5:f0d7d8ed3f901b7ee3f62c1671c15817', 'creator': 'Alex Toussaint', 'release_timestamp': 1587736620, 'timestamp': 1587736620, 'upload_date': '20200424', 'duration': 2014, 'categories': ['Cycling'], 'is_live': False, 'chapters': 'count:3', }, 'params': { 'skip_download': 'm3u8', }, 'skip': 'Account needed', } def _real_extract(self, url): workout_id = self._match_id(url) peloton = self._download_json(f'https://api.onepeloton.com/api/peloton/{workout_id}', workout_id) if peloton.get('ride_id'): if not peloton.get('is_live') or peloton.get('is_encore') or peloton.get('status') != 'PRE_START': return self.url_result('https://members.onepeloton.com/classes/player/{}'.format(peloton['ride_id'])) else: raise ExtractorError('Ride has not started', expected=True) else: raise ExtractorError('Missing video ID') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/performgroup.py��������������������������������������������������0000664�0000000�0000000�00000006153�14675634471�0021767�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import int_or_none, join_nonempty class PerformGroupIE(InfoExtractor): _VALID_URL = r'https?://player\.performgroup\.com/eplayer(?:/eplayer\.html|\.js)#/?(?P<id>[0-9a-f]{26})\.(?P<auth_token>[0-9a-z]{26})' _TESTS = [{ # http://www.faz.net/aktuell/sport/fussball/wm-2018-playoffs-schweiz-besiegt-nordirland-1-0-15286104.html 'url': 'http://player.performgroup.com/eplayer/eplayer.html#d478c41c5d192f56b9aa859de8.1w4crrej5w14e1ed4s1ce4ykab', 'md5': '259cb03d142e2e52471e8837ecacb29f', 'info_dict': { 'id': 'xgrwobuzumes1lwjxtcdpwgxd', 'ext': 'mp4', 'title': 'Liga MX: Keine Einsicht nach Horrorfoul', 'description': 'md5:7cd3b459c82725b021e046ab10bf1c5b', 'timestamp': 1511533477, 'upload_date': '20171124', }, }] def _call_api(self, service, auth_token, content_id, referer_url): return self._download_json( f'http://ep3.performfeeds.com/ep{service}/{auth_token}/{content_id}/', content_id, headers={ 'Referer': referer_url, 'Origin': 'http://player.performgroup.com', }, query={ '_fmt': 'json', }) def _real_extract(self, url): player_id, auth_token = self._match_valid_url(url).groups() bootstrap = self._call_api('bootstrap', auth_token, player_id, url) video = bootstrap['config']['dataSource']['sourceItems'][0]['videos'][0] video_id = video['uuid'] vod = self._call_api('vod', auth_token, video_id, url) media = vod['videos']['video'][0]['media'] formats = [] hls_url = media.get('hls', {}).get('url') if hls_url: formats.extend(self._extract_m3u8_formats(hls_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) hds_url = media.get('hds', {}).get('url') if hds_url: formats.extend(self._extract_f4m_formats(hds_url + '?hdcore', video_id, f4m_id='hds', fatal=False)) for c in media.get('content', []): c_url = c.get('url') if not c_url: continue tbr = int_or_none(c.get('bitrate'), 1000) formats.append({ 'format_id': join_nonempty('http', tbr), 'url': c_url, 'tbr': tbr, 'width': int_or_none(c.get('width')), 'height': int_or_none(c.get('height')), 'filesize': int_or_none(c.get('fileSize')), 'vcodec': c.get('type'), 'fps': int_or_none(c.get('videoFrameRate')), 'vbr': int_or_none(c.get('videoRate'), 1000), 'abr': int_or_none(c.get('audioRate'), 1000), }) return { 'id': video_id, 'title': video['title'], 'description': video.get('description'), 'thumbnail': video.get('poster'), 'duration': int_or_none(video.get('duration')), 'timestamp': int_or_none(video.get('publishedTime'), 1000), 'formats': formats, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/periscope.py�����������������������������������������������������0000664�0000000�0000000�00000016222�14675634471�0021227�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, unescapeHTML, ) from ..utils.traversal import traverse_obj class PeriscopeBaseIE(InfoExtractor): _M3U8_HEADERS = { 'Referer': 'https://www.periscope.tv/', } def _call_api(self, method, query, item_id): return self._download_json( f'https://api.periscope.tv/api/v2/{method}', item_id, query=query) def _parse_broadcast_data(self, broadcast, video_id): title = broadcast.get('status') or 'Periscope Broadcast' uploader = broadcast.get('user_display_name') or broadcast.get('username') title = f'{uploader} - {title}' if uploader else title thumbnails = [{ 'url': broadcast[image], } for image in ('image_url', 'image_url_medium', 'image_url_small') if broadcast.get(image)] return { 'id': broadcast.get('id') or video_id, 'title': title, 'timestamp': parse_iso8601(broadcast.get('created_at')) or int_or_none( broadcast.get('created_at_ms'), scale=1000), 'release_timestamp': int_or_none(broadcast.get('scheduled_start_ms'), scale=1000), 'uploader': uploader, 'uploader_id': broadcast.get('user_id') or broadcast.get('username'), 'thumbnails': thumbnails, 'view_count': int_or_none(broadcast.get('total_watched')), 'concurrent_view_count': int_or_none(broadcast.get('total_watching')), 'tags': broadcast.get('tags'), 'live_status': { 'running': 'is_live', 'not_started': 'is_upcoming', }.get(traverse_obj(broadcast, ('state', {str.lower}))) or 'was_live', } @staticmethod def _extract_common_format_info(broadcast): return broadcast.get('state').lower(), int_or_none(broadcast.get('width')), int_or_none(broadcast.get('height')) @staticmethod def _add_width_and_height(f, width, height): for key, val in (('width', width), ('height', height)): if not f.get(key): f[key] = val def _extract_pscp_m3u8_formats(self, m3u8_url, video_id, format_id, state, width, height, fatal=True): m3u8_formats = self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native' if state in ('ended', 'timed_out') else 'm3u8', m3u8_id=format_id, fatal=fatal, headers=self._M3U8_HEADERS) if len(m3u8_formats) == 1: self._add_width_and_height(m3u8_formats[0], width, height) for f in m3u8_formats: f.setdefault('http_headers', {}).update(self._M3U8_HEADERS) return m3u8_formats class PeriscopeIE(PeriscopeBaseIE): IE_DESC = 'Periscope' IE_NAME = 'periscope' _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/[^/]+/(?P<id>[^/?#]+)' _EMBED_REGEX = [r'<iframe[^>]+src=([\'"])(?P<url>(?:https?:)?//(?:www\.)?(?:periscope|pscp)\.tv/(?:(?!\1).)+)\1'] # Alive example URLs can be found here https://www.periscope.tv/ _TESTS = [{ 'url': 'https://www.periscope.tv/w/aJUQnjY3MjA3ODF8NTYxMDIyMDl2zCg2pECBgwTqRpQuQD352EMPTKQjT4uqlM3cgWFA-g==', 'md5': '65b57957972e503fcbbaeed8f4fa04ca', 'info_dict': { 'id': '56102209', 'ext': 'mp4', 'title': 'Bec Boop - 🚠✈️🇬🇧 Fly above #London in Emirates Air Line cable car at night 🇬🇧✈️🚠 #BoopScope 🎀💗', 'timestamp': 1438978559, 'upload_date': '20150807', 'uploader': 'Bec Boop', 'uploader_id': '1465763', }, 'skip': 'Expires in 24 hours', }, { 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', 'only_matching': True, }, { 'url': 'https://www.periscope.tv/bastaakanoggano/1OdKrlkZZjOJX', 'only_matching': True, }, { 'url': 'https://www.periscope.tv/w/1ZkKzPbMVggJv', 'only_matching': True, }] def _real_extract(self, url): token = self._match_id(url) stream = self._call_api( 'accessVideoPublic', {'broadcast_id': token}, token) broadcast = stream['broadcast'] info = self._parse_broadcast_data(broadcast, token) state = broadcast.get('state').lower() width = int_or_none(broadcast.get('width')) height = int_or_none(broadcast.get('height')) def add_width_and_height(f): for key, val in (('width', width), ('height', height)): if not f.get(key): f[key] = val video_urls = set() formats = [] for format_id in ('replay', 'rtmp', 'hls', 'https_hls', 'lhls', 'lhlsweb'): video_url = stream.get(format_id + '_url') if not video_url or video_url in video_urls: continue video_urls.add(video_url) if format_id != 'rtmp': m3u8_formats = self._extract_pscp_m3u8_formats( video_url, token, format_id, state, width, height, False) formats.extend(m3u8_formats) continue rtmp_format = { 'url': video_url, 'ext': 'flv' if format_id == 'rtmp' else 'mp4', } self._add_width_and_height(rtmp_format) formats.append(rtmp_format) info['formats'] = formats return info class PeriscopeUserIE(PeriscopeBaseIE): _VALID_URL = r'https?://(?:www\.)?(?:periscope|pscp)\.tv/(?P<id>[^/]+)/?$' IE_DESC = 'Periscope user videos' IE_NAME = 'periscope:user' _TEST = { 'url': 'https://www.periscope.tv/LularoeHusbandMike/', 'info_dict': { 'id': 'LularoeHusbandMike', 'title': 'LULAROE HUSBAND MIKE', 'description': 'md5:6cf4ec8047768098da58e446e82c82f0', }, # Periscope only shows videos in the last 24 hours, so it's possible to # get 0 videos 'playlist_mincount': 0, } def _real_extract(self, url): user_name = self._match_id(url) webpage = self._download_webpage(url, user_name) data_store = self._parse_json( unescapeHTML(self._search_regex( r'data-store=(["\'])(?P<data>.+?)\1', webpage, 'data store', default='{}', group='data')), user_name) user = next(iter(data_store['UserCache']['users'].values()))['user'] user_id = user['id'] session_id = data_store['SessionToken']['public']['broadcastHistory']['token']['session_id'] broadcasts = self._call_api( 'getUserBroadcastsPublic', {'user_id': user_id, 'session_id': session_id}, user_name)['broadcasts'] broadcast_ids = [ broadcast['id'] for broadcast in broadcasts if broadcast.get('id')] title = user.get('display_name') or user.get('username') or user_name description = user.get('description') entries = [ self.url_result( f'https://www.periscope.tv/{user_name}/{broadcast_id}') for broadcast_id in broadcast_ids] return self.playlist_result(entries, user_id, title, description) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pgatour.py�������������������������������������������������������0000664�0000000�0000000�00000003647�14675634471�0020726�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .brightcove import BrightcoveNewIE from .common import InfoExtractor class PGATourIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pgatour\.com/video/[\w-]+/(?P<tc>T)?(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.pgatour.com/video/competition/T6322447785112/adam-hadwin-2023-the-players-round-4-18th-hole-shot-1', 'info_dict': { 'id': '6322447785112', 'ext': 'mp4', 'title': 'Adam Hadwin | 2023 THE PLAYERS | Round 4 | 18th hole | Shot 1', 'uploader_id': '6116716431001', 'upload_date': '20230312', 'timestamp': 1678653136, 'duration': 20.011, 'thumbnail': r're:^https://.+\.jpg', 'tags': 'count:7', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.pgatour.com/video/features/6322506425112/follow-the-players-trophy-on-championship-sunday', 'info_dict': { 'id': '6322506425112', 'ext': 'mp4', 'title': 'Follow THE PLAYERS trophy on Championship Sunday', 'description': 'md5:4d29e4bdfa03694a0ebfd08950398568', 'uploader_id': '6082840763001', 'upload_date': '20230313', 'timestamp': 1678739835, 'duration': 123.435, 'thumbnail': r're:^https://.+\.jpg', 'tags': 'count:8', }, 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): video_id, is_tourcast = self._match_valid_url(url).group('id', 'tc') # From https://www.pgatour.com/_next/static/chunks/pages/_app-8bcf849560daf38d.js account_id = '6116716431001' if is_tourcast else '6082840763001' player_id = 'Vsd5Umu8r' if is_tourcast else 'FWIBYMBPj' return self.url_result( f'https://players.brightcove.net/{account_id}/{player_id}_default/index.html?videoId={video_id}', BrightcoveNewIE) �����������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/philharmoniedeparis.py�������������������������������������������0000664�0000000�0000000�00000007665�14675634471�0023300�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import try_get class PhilharmonieDeParisIE(InfoExtractor): IE_DESC = 'Philharmonie de Paris' _VALID_URL = r'''(?x) https?:// (?: live\.philharmoniedeparis\.fr/(?:[Cc]oncert/|embed(?:app)?/|misc/Playlist\.ashx\?id=)| pad\.philharmoniedeparis\.fr/(?:doc/CIMU/|player\.aspx\?id=)| philharmoniedeparis\.fr/fr/live/concert/| otoplayer\.philharmoniedeparis\.fr/fr/embed/ ) (?P<id>\d+) ''' _TESTS = [{ 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1129666-danses-symphoniques', 'md5': '24bdb7e86c200c107680e1f7770330ae', 'info_dict': { 'id': '1129666', 'ext': 'mp4', 'title': 'Danses symphoniques. Orchestre symphonique Divertimento - Zahia Ziouani. Bizet, de Falla, Stravinski, Moussorgski, Saint-Saëns', }, }, { 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1032066-akademie-fur-alte-musik-berlin-rias-kammerchor-rene-jacobs-passion-selon-saint-jean-de-johann', 'info_dict': { 'id': '1032066', 'title': 'Akademie für alte Musik Berlin, Rias Kammerchor, René Jacobs : Passion selon saint Jean de Johann Sebastian Bach', }, 'playlist_mincount': 2, }, { 'url': 'https://philharmoniedeparis.fr/fr/live/concert/1030324-orchestre-philharmonique-de-radio-france-myung-whun-chung-renaud-capucon-pascal-dusapin-johannes', 'only_matching': True, }, { 'url': 'http://live.philharmoniedeparis.fr/misc/Playlist.ashx?id=1030324&track=&lang=fr', 'only_matching': True, }, { 'url': 'https://live.philharmoniedeparis.fr/embedapp/1098406/berlioz-fantastique-lelio-les-siecles-national-youth-choir-of.html?lang=fr-FR', 'only_matching': True, }, { 'url': 'https://otoplayer.philharmoniedeparis.fr/fr/embed/1098406?lang=fr-FR', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) config = self._download_json( f'https://otoplayer.philharmoniedeparis.fr/fr/config/{video_id}.json', video_id, query={ 'id': video_id, 'lang': 'fr-FR', }) def extract_entry(source): if not isinstance(source, dict): return title = source.get('title') if not title: return files = source.get('files') if not isinstance(files, dict): return format_urls = set() formats = [] for format_id in ('mobile', 'desktop'): format_url = try_get( files, lambda x: x[format_id]['file'], str) if not format_url or format_url in format_urls: continue format_urls.add(format_url) formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) if not formats and not self.get_param('ignore_no_formats'): return return { 'title': title, 'formats': formats, 'thumbnail': files.get('thumbnail'), } info = extract_entry(config) if info: info.update({ 'id': video_id, }) return info entries = [] for num, chapter in enumerate(config['chapters'], start=1): entry = extract_entry(chapter) if entry is None: continue entry['id'] = f'{video_id}-{num}' entries.append(entry) return self.playlist_result(entries, video_id, config.get('title')) ���������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/phoenix.py�������������������������������������������������������0000664�0000000�0000000�00000011457�14675634471�0020715�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .youtube import YoutubeIE from .zdf import ZDFBaseIE from ..utils import ( int_or_none, merge_dicts, try_get, unified_timestamp, urljoin, ) class PhoenixIE(ZDFBaseIE): IE_NAME = 'phoenix.de' _VALID_URL = r'https?://(?:www\.)?phoenix\.de/(?:[^/]+/)*[^/?#&]*-a-(?P<id>\d+)\.html' _TESTS = [{ # Same as https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html 'url': 'https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html', 'md5': '34ec321e7eb34231fd88616c65c92db0', 'info_dict': { 'id': '210222_phx_nachgehakt_corona_protest', 'ext': 'mp4', 'title': 'Wohin führt der Protest in der Pandemie?', 'description': 'md5:7d643fe7f565e53a24aac036b2122fbd', 'duration': 1691, 'timestamp': 1613902500, 'upload_date': '20210221', 'uploader': 'Phoenix', 'series': 'corona nachgehakt', 'episode': 'Wohin führt der Protest in der Pandemie?', }, }, { # Youtube embed 'url': 'https://www.phoenix.de/sendungen/gespraeche/phoenix-streitgut-brennglas-corona-a-1965505.html', 'info_dict': { 'id': 'hMQtqFYjomk', 'ext': 'mp4', 'title': 'phoenix streitgut: Brennglas Corona - Wie gerecht ist unsere Gesellschaft?', 'description': 'md5:ac7a02e2eb3cb17600bc372e4ab28fdd', 'duration': 3509, 'upload_date': '20201219', 'uploader': 'phoenix', 'uploader_id': 'phoenix', }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.phoenix.de/entwicklungen-in-russland-a-2044720.html', 'only_matching': True, }, { # no media 'url': 'https://www.phoenix.de/sendungen/dokumentationen/mit-dem-jumbo-durch-die-nacht-a-89625.html', 'only_matching': True, }, { # Same as https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html 'url': 'https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche', 'only_matching': True, }] def _real_extract(self, url): article_id = self._match_id(url) article = self._download_json( f'https://www.phoenix.de/response/id/{article_id}', article_id, 'Downloading article JSON') video = article['absaetze'][0] title = video.get('titel') or article.get('subtitel') if video.get('typ') == 'video-youtube': video_id = video['id'] return self.url_result( video_id, ie=YoutubeIE.ie_key(), video_id=video_id, video_title=title) video_id = str(video.get('basename') or video.get('content')) details = self._download_json( 'https://www.phoenix.de/php/mediaplayer/data/beitrags_details.php', video_id, 'Downloading details JSON', query={ 'ak': 'web', 'ptmd': 'true', 'id': video_id, 'profile': 'player2', }) title = title or details['title'] content_id = details['tracking']['nielsen']['content']['assetid'] info = self._extract_ptmd( f'https://tmd.phoenix.de/tmd/2/ngplayer_2_3/vod/ptmd/phoenix/{content_id}', content_id, None, url) duration = int_or_none(try_get( details, lambda x: x['tracking']['nielsen']['content']['length'])) timestamp = unified_timestamp(details.get('editorialDate')) series = try_get( details, lambda x: x['tracking']['nielsen']['content']['program'], str) episode = title if details.get('contentType') == 'episode' else None thumbnails = [] teaser_images = try_get(details, lambda x: x['teaserImageRef']['layouts'], dict) or {} for thumbnail_key, thumbnail_url in teaser_images.items(): thumbnail_url = urljoin(url, thumbnail_url) if not thumbnail_url: continue thumbnail = { 'url': thumbnail_url, } m = re.match('^([0-9]+)x([0-9]+)$', thumbnail_key) if m: thumbnail['width'] = int(m.group(1)) thumbnail['height'] = int(m.group(2)) thumbnails.append(thumbnail) return merge_dicts(info, { 'id': content_id, 'title': title, 'description': details.get('leadParagraph'), 'duration': duration, 'thumbnails': thumbnails, 'timestamp': timestamp, 'uploader': details.get('tvService'), 'series': series, 'episode': episode, }) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/photobucket.py���������������������������������������������������0000664�0000000�0000000�00000003242�14675634471�0021563�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import urllib.parse from .common import InfoExtractor class PhotobucketIE(InfoExtractor): _VALID_URL = r'https?://(?:[a-z0-9]+\.)?photobucket\.com/.*(([\?\&]current=)|_)(?P<id>.*)\.(?P<ext>(flv)|(mp4))' _TEST = { 'url': 'http://media.photobucket.com/user/rachaneronas/media/TiredofLinkBuildingTryBacklinkMyDomaincom_zpsc0c3b9fa.mp4.html?filters[term]=search&filters[primary]=videos&filters[secondary]=images&sort=1&o=0', 'md5': '7dabfb92b0a31f6c16cebc0f8e60ff99', 'info_dict': { 'id': 'zpsc0c3b9fa', 'ext': 'mp4', 'timestamp': 1367669341, 'upload_date': '20130504', 'uploader': 'rachaneronas', 'title': 'Tired of Link Building? Try BacklinkMyDomain.com!', }, } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') video_extension = mobj.group('ext') webpage = self._download_webpage(url, video_id) # Extract URL, uploader, and title from webpage self.report_extraction(video_id) info_json = self._search_regex(r'Pb\.Data\.Shared\.put\(Pb\.Data\.Shared\.MEDIA, (.*?)\);', webpage, 'info json') info = json.loads(info_json) url = urllib.parse.unquote(self._html_search_regex(r'file=(.+\.mp4)', info['linkcodes']['html'], 'url')) return { 'id': video_id, 'url': url, 'uploader': info['username'], 'timestamp': info['creationDate'], 'title': info['title'], 'ext': video_extension, 'thumbnail': info['thumbUrl'], } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/piapro.py��������������������������������������������������������0000664�0000000�0000000�00000010316�14675634471�0020526�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, get_element_by_class, parse_duration, parse_filesize, str_to_int, unified_timestamp, urlencode_postdata, ) class PiaproIE(InfoExtractor): _NETRC_MACHINE = 'piapro' _VALID_URL = r'https?://piapro\.jp/(?:t|content)/(?P<id>[\w-]+)/?' _TESTS = [{ 'url': 'https://piapro.jp/t/NXYR', 'md5': 'f7c0f760913fb1d44a1c45a4af793909', 'info_dict': { 'id': 'NXYR', 'ext': 'mp3', 'uploader': 'wowaka', 'uploader_id': 'wowaka', 'title': '裏表ラバーズ', 'description': 'http://www.nicovideo.jp/watch/sm8082467', 'duration': 189.0, 'timestamp': 1251785475, 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', 'upload_date': '20090901', 'view_count': int, }, }, { 'note': 'There are break lines in description, mandating (?s) flag', 'url': 'https://piapro.jp/t/9cSd', 'md5': '952bb6d1e8de95050206408a87790676', 'info_dict': { 'id': '9cSd', 'ext': 'mp3', 'title': '青に溶けた風船 / 初音ミク', 'description': 'md5:d395a9bd151447631a5a1460bc7f9132', 'uploader': 'シアン・キノ', 'duration': 229.0, 'timestamp': 1644030039, 'upload_date': '20220205', 'view_count': int, 'thumbnail': r're:^https?://.*\.(?:png|jpg)$', 'uploader_id': 'cyankino', }, }, { 'url': 'https://piapro.jp/content/hcw0z3a169wtemz6', 'only_matching': True, }, { 'url': 'https://piapro.jp/t/-SO-', 'only_matching': True, }] _login_status = False def _perform_login(self, username, password): login_ok = True login_form_strs = { '_username': username, '_password': password, '_remember_me': 'on', 'login': 'ログイン', } self._request_webpage('https://piapro.jp/login/', None) urlh = self._request_webpage( 'https://piapro.jp/login/exe', None, note='Logging in', errnote='Unable to log in', data=urlencode_postdata(login_form_strs)) if urlh is False: login_ok = False else: parts = urllib.parse.urlparse(urlh.url) if parts.path != '/': login_ok = False if not login_ok: self.report_warning( 'unable to log in: bad username or password') self._login_status = login_ok def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) category_id = self._search_regex(r'categoryId=(.+)">', webpage, 'category ID') if category_id not in ('1', '2', '21', '22', '23', '24', '25'): raise ExtractorError('The URL does not contain audio.', expected=True) def extract_info(name, description): return self._search_regex(rf'{name}[::]\s*([\d\s,:/]+)\s*</p>', webpage, description, default=None) return { 'id': video_id, 'title': clean_html(get_element_by_class('contents_title', webpage)), 'description': clean_html(get_element_by_class('contents_description', webpage)), 'uploader': clean_html(get_element_by_class('contents_creator_txt', webpage)), 'uploader_id': self._search_regex( r'<a\s+href="/([^"]+)"', get_element_by_class('contents_creator', webpage), 'uploader id', default=None), 'timestamp': unified_timestamp(extract_info('投稿日', 'timestamp'), False), 'duration': parse_duration(extract_info('長さ', 'duration')), 'view_count': str_to_int(extract_info('閲覧数', 'view count')), 'thumbnail': self._html_search_meta('twitter:image', webpage), 'filesize_approx': parse_filesize((extract_info('サイズ', 'size') or '').replace(',', '')), 'url': self._search_regex(r'\"url\":\s*\"(.*?)\"', webpage, 'url'), 'ext': 'mp3', 'vcodec': 'none', } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/piaulizaportal.py������������������������������������������������0000664�0000000�0000000�00000005260�14675634471�0022276�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, parse_qs, time_seconds, traverse_obj, ) class PIAULIZAPortalIE(InfoExtractor): IE_DESC = 'ulizaportal.jp - PIA LIVE STREAM' _VALID_URL = r'https?://(?:www\.)?ulizaportal\.jp/pages/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})' _TESTS = [{ 'url': 'https://ulizaportal.jp/pages/005f18b7-e810-5618-cb82-0987c5755d44', 'info_dict': { 'id': '005f18b7-e810-5618-cb82-0987c5755d44', 'title': 'プレゼンテーションプレイヤーのサンプル', 'live_status': 'not_live', }, 'params': { 'skip_download': True, 'ignore_no_formats_error': True, }, }, { 'url': 'https://ulizaportal.jp/pages/005e1b23-fe93-5780-19a0-98e917cc4b7d?expires=4102412400&signature=f422a993b683e1068f946caf406d211c17d1ef17da8bef3df4a519502155aa91&version=1', 'info_dict': { 'id': '005e1b23-fe93-5780-19a0-98e917cc4b7d', 'title': '【確認用】視聴サンプルページ(ULIZA)', 'live_status': 'not_live', }, 'params': { 'skip_download': True, 'ignore_no_formats_error': True, }, }] def _real_extract(self, url): video_id = self._match_id(url) expires = int_or_none(traverse_obj(parse_qs(url), ('expires', 0))) if expires and expires <= time_seconds(): raise ExtractorError('The link is expired.', video_id=video_id, expected=True) webpage = self._download_webpage(url, video_id) player_data = self._download_webpage( self._search_regex( r'<script [^>]*\bsrc="(https://player-api\.p\.uliza\.jp/v1/players/[^"]+)"', webpage, 'player data url'), video_id, headers={'Referer': 'https://ulizaportal.jp/'}, note='Fetching player data', errnote='Unable to fetch player data') formats = self._extract_m3u8_formats( self._search_regex( r'["\'](https://vms-api\.p\.uliza\.jp/v1/prog-index\.m3u8[^"\']+)', player_data, 'm3u8 url', default=None), video_id, fatal=False) m3u8_type = self._search_regex( r'/hls/(dvr|video)/', traverse_obj(formats, (0, 'url')), 'm3u8 type', default=None) return { 'id': video_id, 'title': self._html_extract_title(webpage), 'formats': formats, 'live_status': { 'video': 'is_live', 'dvr': 'was_live', # short-term archives }.get(m3u8_type, 'not_live'), # VOD or long-term archives } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/picarto.py�������������������������������������������������������0000664�0000000�0000000�00000011341�14675634471�0020674�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, str_or_none, traverse_obj, update_url, ) class PicartoIE(InfoExtractor): _VALID_URL = r'https?://(?:www.)?picarto\.tv/(?P<id>[a-zA-Z0-9]+)' _TEST = { 'url': 'https://picarto.tv/Setz', 'info_dict': { 'id': 'Setz', 'ext': 'mp4', 'title': 're:^Setz [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'timestamp': int, 'is_live': True, }, 'skip': 'Stream is offline', } @classmethod def suitable(cls, url): return False if PicartoVodIE.suitable(url) else super().suitable(url) def _real_extract(self, url): channel_id = self._match_id(url) data = self._download_json( 'https://ptvintern.picarto.tv/ptvapi', channel_id, query={ 'query': '''{ channel(name: "%s") { adult id online stream_name title } getLoadBalancerUrl(channel_name: "%s") { url } }''' % (channel_id, channel_id), # noqa: UP031 }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data'] metadata = data['channel'] if metadata.get('online') == 0: raise ExtractorError('Stream is offline', expected=True) title = metadata['title'] cdn_data = self._download_json(''.join(( update_url(data['getLoadBalancerUrl']['url'], scheme='https'), '/stream/json_', metadata['stream_name'], '.js')), channel_id, 'Downloading load balancing info') formats = [] for source in (cdn_data.get('source') or []): source_url = source.get('url') if not source_url: continue source_type = source.get('type') if source_type == 'html5/application/vnd.apple.mpegurl': formats.extend(self._extract_m3u8_formats( source_url, channel_id, 'mp4', m3u8_id='hls', fatal=False)) elif source_type == 'html5/video/mp4': formats.append({ 'url': source_url, }) mature = metadata.get('adult') if mature is None: age_limit = None else: age_limit = 18 if mature is True else 0 return { 'id': channel_id, 'title': title.strip(), 'is_live': True, 'channel': channel_id, 'channel_id': metadata.get('id'), 'channel_url': f'https://picarto.tv/{channel_id}', 'age_limit': age_limit, 'formats': formats, } class PicartoVodIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?picarto\.tv/(?:videopopout|\w+/videos)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://picarto.tv/videopopout/ArtofZod_2017.12.12.00.13.23.flv', 'md5': '3ab45ba4352c52ee841a28fb73f2d9ca', 'info_dict': { 'id': 'ArtofZod_2017.12.12.00.13.23.flv', 'ext': 'mp4', 'title': 'ArtofZod_2017.12.12.00.13.23.flv', 'thumbnail': r're:^https?://.*\.jpg', }, 'skip': 'The VOD does not exist', }, { 'url': 'https://picarto.tv/ArtofZod/videos/771008', 'md5': 'abef5322f2700d967720c4c6754b2a34', 'info_dict': { 'id': '771008', 'ext': 'mp4', 'title': 'Art of Zod - Drawing and Painting', 'thumbnail': r're:^https?://.*\.jpg', 'channel': 'ArtofZod', 'age_limit': 18, }, }, { 'url': 'https://picarto.tv/videopopout/Plague', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) data = self._download_json( 'https://ptvintern.picarto.tv/ptvapi', video_id, query={ 'query': f'''{{ video(id: "{video_id}") {{ id title adult file_name video_recording_image_url channel {{ name }} }} }}''', }, headers={'Accept': '*/*', 'Content-Type': 'application/json'})['data']['video'] file_name = data['file_name'] netloc = urllib.parse.urlparse(data['video_recording_image_url']).netloc formats = self._extract_m3u8_formats( f'https://{netloc}/stream/hls/{file_name}/index.m3u8', video_id, 'mp4', m3u8_id='hls') return { 'id': video_id, **traverse_obj(data, { 'id': ('id', {str_or_none}), 'title': ('title', {str}), 'thumbnail': 'video_recording_image_url', 'channel': ('channel', 'name', {str}), 'age_limit': ('adult', {lambda x: 18 if x else 0}), }), 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/piksel.py��������������������������������������������������������0000664�0000000�0000000�00000016046�14675634471�0020531�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( ExtractorError, dict_get, int_or_none, join_nonempty, parse_iso8601, traverse_obj, try_get, unescapeHTML, urljoin, ) class PikselIE(InfoExtractor): _VALID_URL = r'''(?x)https?:// (?: (?: player\. (?: olympusattelecom| vibebyvista )| (?:api|player)\.multicastmedia| (?:api-ovp|player)\.piksel )\.(?:com|tech)| (?: mz-edge\.stream\.co| movie-s\.nhk\.or )\.jp| vidego\.baltimorecity\.gov )/v/(?:refid/(?P<refid>[^/]+)/prefid/)?(?P<id>[\w-]+)''' _EMBED_REGEX = [r'<iframe[^>]+src=["\'](?P<url>(?:https?:)?//player\.piksel\.(?:com|tech)/v/[a-z0-9]+)'] _TESTS = [ { 'url': 'http://player.piksel.tech/v/ums2867l', 'md5': '34e34c8d89dc2559976a6079db531e85', 'info_dict': { 'id': 'ums2867l', 'ext': 'mp4', 'title': 'GX-005 with Caption', 'timestamp': 1481335659, 'upload_date': '20161210', 'description': '', 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1488331553/3238987.jpg?w=640&h=480', }, }, { # Original source: http://www.uscourts.gov/cameras-courts/state-washington-vs-donald-j-trump-et-al 'url': 'https://player.piksel.tech/v/v80kqp41', 'md5': '753ddcd8cc8e4fa2dda4b7be0e77744d', 'info_dict': { 'id': 'v80kqp41', 'ext': 'mp4', 'title': 'WAW- State of Washington vs. Donald J. Trump, et al', 'description': 'State of Washington vs. Donald J. Trump, et al, Case Number 17-CV-00141-JLR, TRO Hearing, Civil Rights Case, 02/3/2017, 1:00 PM (PST), Seattle Federal Courthouse, Seattle, WA, Judge James L. Robart presiding.', 'timestamp': 1486171129, 'upload_date': '20170204', 'thumbnail': 'https://thumbs.piksel.tech/thumbs/aid/t1495569155/3279887.jpg?w=640&h=360', }, }, { # https://www3.nhk.or.jp/nhkworld/en/ondemand/video/2019240/ 'url': 'http://player.piksel.com/v/refid/nhkworld/prefid/nw_vod_v_en_2019_240_20190823233000_02_1566873477', 'only_matching': True, }, ] def _call_api(self, app_token, resource, display_id, query, host='https://player.piksel.tech', fatal=True): url = urljoin(host, f'/ws/ws_{resource}/api/{app_token}/mode/json/apiv/5') response = traverse_obj( self._download_json(url, display_id, query=query, fatal=fatal), ('response', {dict})) or {} failure = traverse_obj(response, ('failure', 'reason')) if response else 'Empty response from API' if failure: if fatal: raise ExtractorError(failure, expected=True) self.report_warning(failure) return response def _real_extract(self, url): ref_id, display_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, display_id) app_token = self._search_regex([ r'clientAPI\s*:\s*"([^"]+)"', r'data-de-api-key\s*=\s*"([^"]+)"', ], webpage, 'app token') query = {'refid': ref_id, 'prefid': display_id} if ref_id else {'v': display_id} program = self._call_api( app_token, 'program', display_id, query, url)['WsProgramResponse']['program'] video_id = program['uuid'] video_data = program['asset'] title = video_data['title'] asset_type = dict_get(video_data, ['assetType', 'asset_type']) formats = [] def process_asset_file(asset_file): if not asset_file: return # TODO: extract rtmp formats http_url = asset_file.get('http_url') if not http_url: return tbr = None vbr = int_or_none(asset_file.get('videoBitrate'), 1024) abr = int_or_none(asset_file.get('audioBitrate'), 1024) if asset_type == 'video': tbr = vbr + abr elif asset_type == 'audio': tbr = abr formats.append({ 'format_id': join_nonempty('http', tbr), 'url': unescapeHTML(http_url), 'vbr': vbr, 'abr': abr, 'width': int_or_none(asset_file.get('videoWidth')), 'height': int_or_none(asset_file.get('videoHeight')), 'filesize': int_or_none(asset_file.get('filesize')), 'tbr': tbr, }) def process_asset_files(asset_files): for asset_file in (asset_files or []): process_asset_file(asset_file) process_asset_files(video_data.get('assetFiles')) process_asset_file(video_data.get('referenceFile')) if not formats: asset_id = video_data.get('assetid') or program.get('assetid') if asset_id: process_asset_files(try_get(self._call_api( app_token, 'asset_file', display_id, { 'assetid': asset_id, }, url, False), lambda x: x['WsAssetFileResponse']['AssetFiles'])) m3u8_url = dict_get(video_data, [ 'm3u8iPadURL', 'ipadM3u8Url', 'm3u8AndroidURL', 'm3u8iPhoneURL', 'iphoneM3u8Url']) if m3u8_url: formats.extend(self._extract_m3u8_formats( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) smil_url = dict_get(video_data, ['httpSmil', 'hdSmil', 'rtmpSmil']) if smil_url: transform_source = lambda x: x.replace('src="/', 'src="') if ref_id == 'nhkworld': # TODO: figure out if this is something to be fixed in urljoin, # _parse_smil_formats or keep it here transform_source = lambda x: x.replace('src="/', 'src="').replace('/media"', '/media/"') formats.extend(self._extract_smil_formats( re.sub(r'/od/[^/]+/', '/od/http/', smil_url), video_id, transform_source=transform_source, fatal=False)) subtitles = {} for caption in video_data.get('captions', []): caption_url = caption.get('url') if caption_url: subtitles.setdefault(caption.get('locale', 'en'), []).append({ 'url': caption_url}) return { 'id': video_id, 'title': title, 'description': video_data.get('description'), 'thumbnail': video_data.get('thumbnailUrl'), 'timestamp': parse_iso8601(video_data.get('dateadd')), 'formats': formats, 'subtitles': subtitles, '_format_sort_fields': ('tbr', ), # Incomplete resolution information } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pinkbike.py������������������������������������������������������0000664�0000000�0000000�00000006427�14675634471�0021040�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( int_or_none, remove_end, remove_start, str_to_int, unified_strdate, ) class PinkbikeIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?pinkbike\.com/video/|es\.pinkbike\.org/i/kvid/kvid-y5\.swf\?id=)(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.pinkbike.com/video/402811/', 'md5': '4814b8ca7651034cd87e3361d5c2155a', 'info_dict': { 'id': '402811', 'ext': 'mp4', 'title': 'Brandon Semenuk - RAW 100', 'description': 'Official release: www.redbull.ca/rupertwalker', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 100, 'upload_date': '20150406', 'uploader': 'revelco', 'location': 'Victoria, British Columbia, Canada', 'view_count': int, 'comment_count': int, }, }, { 'url': 'http://es.pinkbike.org/i/kvid/kvid-y5.swf?id=406629', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( f'http://www.pinkbike.com/video/{video_id}', video_id) formats = [] for _, format_id, src in re.findall( r'data-quality=((?:\\)?["\'])(.+?)\1[^>]+src=\1(.+?)\1', webpage): height = int_or_none(self._search_regex( r'^(\d+)[pP]$', format_id, 'height', default=None)) formats.append({ 'url': src, 'format_id': format_id, 'height': height, }) title = remove_end(self._og_search_title(webpage), ' Video - Pinkbike') description = self._html_search_regex( r'(?s)id="media-description"[^>]*>(.+?)<', webpage, 'description', default=None) or remove_start( self._og_search_description(webpage), title + '. ') thumbnail = self._og_search_thumbnail(webpage) duration = int_or_none(self._html_search_meta( 'video:duration', webpage, 'duration')) uploader = self._search_regex( r'<a[^>]+\brel=["\']author[^>]+>([^<]+)', webpage, 'uploader', fatal=False) upload_date = unified_strdate(self._search_regex( r'class="fullTime"[^>]+title="([^"]+)"', webpage, 'upload date', fatal=False)) location = self._html_search_regex( r'(?s)<dt>Location</dt>\s*<dd>(.+?)<', webpage, 'location', fatal=False) def extract_count(webpage, label): return str_to_int(self._search_regex( rf'<span[^>]+class="stat-num"[^>]*>([\d,.]+)</span>\s*<span[^>]+class="stat-label"[^>]*>{label}', webpage, label, fatal=False)) view_count = extract_count(webpage, 'Views') comment_count = extract_count(webpage, 'Comments') return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'upload_date': upload_date, 'uploader': uploader, 'location': location, 'view_count': view_count, 'comment_count': comment_count, 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pinterest.py�����������������������������������������������������0000664�0000000�0000000�00000024376�14675634471�0021264�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import ( determine_ext, float_or_none, int_or_none, str_or_none, strip_or_none, traverse_obj, unified_timestamp, url_or_none, ) class PinterestBaseIE(InfoExtractor): _VALID_URL_BASE = r'''(?x) https?://(?:[^/]+\.)?pinterest\.(?: com|fr|de|ch|jp|cl|ca|it|co\.uk|nz|ru|com\.au|at|pt|co\.kr|es|com\.mx| dk|ph|th|com\.uy|co|nl|info|kr|ie|vn|com\.vn|ec|mx|in|pe|co\.at|hu| co\.in|co\.nz|id|com\.ec|com\.py|tw|be|uk|com\.bo|com\.pe)''' def _call_api(self, resource, video_id, options): return self._download_json( f'https://www.pinterest.com/resource/{resource}Resource/get/', video_id, f'Download {resource} JSON metadata', query={ 'data': json.dumps({'options': options}), })['resource_response'] def _extract_video(self, data, extract_formats=True): video_id = data['id'] thumbnails = [] images = data.get('images') if isinstance(images, dict): for thumbnail in images.values(): if not isinstance(thumbnail, dict): continue thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue thumbnails.append({ 'url': thumbnail_url, 'width': int_or_none(thumbnail.get('width')), 'height': int_or_none(thumbnail.get('height')), }) info = { 'title': strip_or_none(traverse_obj(data, 'title', 'grid_title', default='')), 'description': traverse_obj(data, 'seo_description', 'description'), 'timestamp': unified_timestamp(data.get('created_at')), 'thumbnails': thumbnails, 'uploader': traverse_obj(data, ('closeup_attribution', 'full_name')), 'uploader_id': str_or_none(traverse_obj(data, ('closeup_attribution', 'id'))), 'repost_count': int_or_none(data.get('repin_count')), 'comment_count': int_or_none(data.get('comment_count')), 'categories': traverse_obj(data, ('pin_join', 'visual_annotation'), expected_type=list), 'tags': traverse_obj(data, 'hashtags', expected_type=list), } urls = [] formats = [] duration = None domain = data.get('domain', '') if domain.lower() != 'uploaded by user' and traverse_obj(data, ('embed', 'src')): if not info['title']: info['title'] = None return { '_type': 'url_transparent', 'url': data['embed']['src'], **info, } elif extract_formats: video_list = traverse_obj( data, ('videos', 'video_list'), ('story_pin_data', 'pages', ..., 'blocks', ..., 'video', 'video_list'), expected_type=dict, get_all=False, default={}) for format_id, format_dict in video_list.items(): if not isinstance(format_dict, dict): continue format_url = url_or_none(format_dict.get('url')) if not format_url or format_url in urls: continue urls.append(format_url) duration = float_or_none(format_dict.get('duration'), scale=1000) ext = determine_ext(format_url) if 'hls' in format_id.lower() or ext == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, fatal=False)) else: formats.append({ 'url': format_url, 'format_id': format_id, 'width': int_or_none(format_dict.get('width')), 'height': int_or_none(format_dict.get('height')), 'duration': duration, }) return { 'id': video_id, 'formats': formats, 'duration': duration, 'webpage_url': f'https://www.pinterest.com/pin/{video_id}/', 'extractor_key': PinterestIE.ie_key(), 'extractor': PinterestIE.IE_NAME, **info, } class PinterestIE(PinterestBaseIE): _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/pin/(?:[\w-]+--)?(?P<id>\d+)' _TESTS = [{ # formats found in data['videos'] 'url': 'https://www.pinterest.com/pin/664281013778109217/', 'md5': '6550c2af85d6d9f3fe3b88954d1577fc', 'info_dict': { 'id': '664281013778109217', 'ext': 'mp4', 'title': 'Origami', 'description': 'md5:e29801cab7d741ea8c741bc50c8d00ab', 'duration': 57.7, 'timestamp': 1593073622, 'upload_date': '20200625', 'repost_count': int, 'comment_count': int, 'categories': list, 'tags': list, 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', }, }, { # formats found in data['story_pin_data'] 'url': 'https://www.pinterest.com/pin/1084663891475263837/', 'md5': '069ac19919ab9e1e13fa60de46290b03', 'info_dict': { 'id': '1084663891475263837', 'ext': 'mp4', 'title': 'Gadget, Cool products, Amazon product, technology, Kitchen gadgets', 'description': 'md5:d0a4b6ae996ff0c6eed83bc869598d13', 'uploader': 'CoolCrazyGadgets', 'uploader_id': '1084664028912989237', 'upload_date': '20211003', 'timestamp': 1633246654.0, 'duration': 14.9, 'comment_count': int, 'repost_count': int, 'categories': 'count:9', 'tags': list, 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', }, }, { # vimeo.com embed 'url': 'https://www.pinterest.ca/pin/441282463481903715/', 'info_dict': { 'id': '111691128', 'ext': 'mp4', 'title': 'Tonite Let\'s All Make Love In London (1967)', 'description': 'md5:8190f37b3926807809ec57ec21aa77b2', 'uploader': 'Vimeo', 'uploader_id': '473792960706651251', 'upload_date': '20180120', 'timestamp': 1516409040, 'duration': 3404, 'comment_count': int, 'repost_count': int, 'categories': 'count:9', 'tags': [], 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', 'uploader_url': 'https://vimeo.com/willardandrade', }, 'params': { 'skip_download': 'm3u8', }, }, { 'url': 'https://co.pinterest.com/pin/824721750502199491/', 'only_matching': True, }, { 'url': 'https://pinterest.com/pin/dive-into-serenity-blue-lagoon-pedi-nails-for-a-tranquil-and-refreshing-spa-experience-video-in-2024--2885187256207927', 'info_dict': { 'id': '2885187256207927', 'ext': 'mp4', 'title': 'Dive into Serenity: Blue Lagoon Pedi Nails for a Tranquil and Refreshing Spa Experience! 💙💅', 'description': 'md5:5da41c767d2317e42e49b663b0b2150f', 'uploader': 'Glamour Artistry |Everyday Outfits, Luxury Fashion & Nail Designs', 'uploader_id': '1142999717836434688', 'upload_date': '20240702', 'timestamp': 1719939156, 'duration': 7.967, 'comment_count': int, 'repost_count': int, 'categories': 'count:9', 'tags': ['#BlueLagoonPediNails', '#SpaExperience'], 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', }, }] def _real_extract(self, url): video_id = self._match_id(url) data = self._call_api( 'Pin', video_id, { 'field_set_key': 'unauth_react_main_pin', 'id': video_id, })['data'] return self._extract_video(data) class PinterestCollectionIE(PinterestBaseIE): _VALID_URL = rf'{PinterestBaseIE._VALID_URL_BASE}/(?P<username>[^/]+)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pinterest.ca/mashal0407/cool-diys/', 'info_dict': { 'id': '585890301462791043', 'title': 'cool diys', }, 'playlist_count': 8, }, { 'url': 'https://www.pinterest.ca/fudohub/videos/', 'info_dict': { 'id': '682858430939307450', 'title': 'VIDEOS', }, 'playlist_mincount': 365, 'skip': 'Test with extract_formats=False', }] @classmethod def suitable(cls, url): return False if PinterestIE.suitable(url) else super().suitable(url) def _real_extract(self, url): username, slug = self._match_valid_url(url).groups() board = self._call_api( 'Board', slug, { 'slug': slug, 'username': username, })['data'] board_id = board['id'] options = { 'board_id': board_id, 'page_size': 250, } bookmark = None entries = [] while True: if bookmark: options['bookmarks'] = [bookmark] board_feed = self._call_api('BoardFeed', board_id, options) for item in (board_feed.get('data') or []): if not isinstance(item, dict) or item.get('type') != 'pin': continue video_id = item.get('id') if video_id: # Some pins may not be available anonymously via pin URL # video = self._extract_video(item, extract_formats=False) # video.update({ # '_type': 'url_transparent', # 'url': 'https://www.pinterest.com/pin/%s/' % video_id, # }) # entries.append(video) entries.append(self._extract_video(item)) bookmark = board_feed.get('bookmark') if not bookmark: break return self.playlist_result( entries, playlist_id=board_id, playlist_title=board.get('name')) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pixivsketch.py���������������������������������������������������0000664�0000000�0000000�00000011173�14675634471�0021577�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, traverse_obj, unified_timestamp, ) class PixivSketchBaseIE(InfoExtractor): def _call_api(self, video_id, path, referer, note='Downloading JSON metadata'): response = self._download_json(f'https://sketch.pixiv.net/api/{path}', video_id, note=note, headers={ 'Referer': referer, 'X-Requested-With': referer, }) errors = traverse_obj(response, ('errors', ..., 'message')) if errors: raise ExtractorError(' '.join(f'{e}.' for e in errors)) return response.get('data') or {} class PixivSketchIE(PixivSketchBaseIE): IE_NAME = 'pixiv:sketch' _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<uploader_id>[a-zA-Z0-9_-]+)/lives/(?P<id>\d+)/?' _TESTS = [{ 'url': 'https://sketch.pixiv.net/@nuhutya/lives/3654620468641830507', 'info_dict': { 'id': '7370666691623196569', 'title': 'まにあえクリスマス!', 'uploader': 'ぬふちゃ', 'uploader_id': 'nuhutya', 'channel_id': '9844815', 'age_limit': 0, 'timestamp': 1640351536, }, 'skip': True, }, { # these two (age_limit > 0) requires you to login on website, but it's actually not required for download 'url': 'https://sketch.pixiv.net/@namahyou/lives/4393103321546851377', 'info_dict': { 'id': '4907995960957946943', 'title': 'クリスマスなんて知らん🖕', 'uploader': 'すゃもり', 'uploader_id': 'suya2mori2', 'channel_id': '31169300', 'age_limit': 15, 'timestamp': 1640347640, }, 'skip': True, }, { 'url': 'https://sketch.pixiv.net/@8aki/lives/3553803162487249670', 'info_dict': { 'id': '1593420639479156945', 'title': 'おまけ本作業(リョナ有)', 'uploader': 'おぶい / Obui', 'uploader_id': 'oving', 'channel_id': '17606', 'age_limit': 18, 'timestamp': 1640330263, }, 'skip': True, }] def _real_extract(self, url): video_id, uploader_id = self._match_valid_url(url).group('id', 'uploader_id') data = self._call_api(video_id, f'lives/{video_id}.json', url) if not traverse_obj(data, 'is_broadcasting'): raise ExtractorError(f'This live is offline. Use https://sketch.pixiv.net/@{uploader_id} for ongoing live.', expected=True) m3u8_url = traverse_obj(data, ('owner', 'hls_movie', 'url')) formats = self._extract_m3u8_formats( m3u8_url, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls') return { 'id': video_id, 'title': data.get('name'), 'formats': formats, 'uploader': traverse_obj(data, ('user', 'name'), ('owner', 'user', 'name')), 'uploader_id': traverse_obj(data, ('user', 'unique_name'), ('owner', 'user', 'unique_name')), 'channel_id': str(traverse_obj(data, ('user', 'pixiv_user_id'), ('owner', 'user', 'pixiv_user_id'))), 'age_limit': 18 if data.get('is_r18') else 15 if data.get('is_r15') else 0, 'timestamp': unified_timestamp(data.get('created_at')), 'is_live': True, } class PixivSketchUserIE(PixivSketchBaseIE): IE_NAME = 'pixiv:sketch:user' _VALID_URL = r'https?://sketch\.pixiv\.net/@(?P<id>[a-zA-Z0-9_-]+)/?' _TESTS = [{ 'url': 'https://sketch.pixiv.net/@nuhutya', 'only_matching': True, }, { 'url': 'https://sketch.pixiv.net/@namahyou', 'only_matching': True, }, { 'url': 'https://sketch.pixiv.net/@8aki', 'only_matching': True, }] @classmethod def suitable(cls, url): return super().suitable(url) and not PixivSketchIE.suitable(url) def _real_extract(self, url): user_id = self._match_id(url) data = self._call_api(user_id, f'lives/users/@{user_id}.json', url) if not traverse_obj(data, 'is_broadcasting'): try: self._call_api(user_id, 'users/current.json', url, 'Investigating reason for request failure') except ExtractorError as ex: if ex.cause and ex.cause.code == 401: self.raise_login_required(f'Please log in, or use direct link like https://sketch.pixiv.net/@{user_id}/1234567890', method='cookies') raise ExtractorError('This user is offline', expected=True) return self.url_result(f'https://sketch.pixiv.net/@{user_id}/lives/{data["id"]}') �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pladform.py������������������������������������������������������0000664�0000000�0000000�00000011467�14675634471�0021050�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, int_or_none, parse_qs, qualities, xpath_text, ) class PladformIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: (?: out\.pladform\.ru/player| static\.pladform\.ru/player\.swf ) \?.*\bvideoid=| video\.pladform\.ru/catalog/video/videoid/ ) (?P<id>\d+) ''' _EMBED_REGEX = [r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//out\.pladform\.ru/player\?.+?)\1'] _TESTS = [{ 'url': 'http://out.pladform.ru/player?pl=18079&type=html5&videoid=100231282', 'info_dict': { 'id': '6216d548e755edae6e8280667d774791', 'ext': 'mp4', 'timestamp': 1406117012, 'title': 'Гарик Мартиросян и Гарик Харламов - Кастинг на концерт ко Дню милиции', 'age_limit': 0, 'upload_date': '20140723', 'thumbnail': str, 'view_count': int, 'description': str, 'uploader_id': '12082', 'uploader': 'Comedy Club', 'duration': 367, }, 'expected_warnings': ['HTTP Error 404: Not Found'], }, { 'url': 'https://out.pladform.ru/player?pl=64471&videoid=3777899&vk_puid15=0&vk_puid34=0', 'md5': '53362fac3a27352da20fa2803cc5cd6f', 'info_dict': { 'id': '3777899', 'ext': 'mp4', 'title': 'СТУДИЯ СОЮЗ • Шоу Студия Союз, 24 выпуск (01.02.2018) Нурлан Сабуров и Слава Комиссаренко', 'description': 'md5:05140e8bf1b7e2d46e7ba140be57fd95', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 3190, }, }, { 'url': 'http://static.pladform.ru/player.swf?pl=21469&videoid=100183293&vkcid=0', 'only_matching': True, }, { 'url': 'http://video.pladform.ru/catalog/video/videoid/100183293/vkcid/0', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) qs = parse_qs(url) pl = qs.get('pl', ['1'])[0] video = self._download_xml( 'http://out.pladform.ru/getVideo', video_id, query={ 'pl': pl, 'videoid': video_id, }, fatal=False) def fail(text): raise ExtractorError( f'{self.IE_NAME} returned error: {text}', expected=True) if not video: target_url = self._request_webpage(url, video_id, note='Resolving final URL').url if target_url == url: raise ExtractorError('Can\'t parse page') return self.url_result(target_url) if video.tag == 'error': fail(video.text) quality = qualities(('ld', 'sd', 'hd')) formats = [] for src in video.findall('./src'): if src is None: continue format_url = src.text if not format_url: continue if src.get('type') == 'hls' or determine_ext(format_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) else: formats.append({ 'url': src.text, 'format_id': src.get('quality'), 'quality': quality(src.get('quality')), }) if not formats: error = xpath_text(video, './cap', 'error', default=None) if error: fail(error) webpage = self._download_webpage( f'http://video.pladform.ru/catalog/video/videoid/{video_id}', video_id) title = self._og_search_title(webpage, fatal=False) or xpath_text( video, './/title', 'title', fatal=True) description = self._search_regex( r'</h3>\s*<p>([^<]+)</p>', webpage, 'description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) or xpath_text( video, './/cover', 'cover') duration = int_or_none(xpath_text(video, './/time', 'duration')) age_limit = int_or_none(xpath_text(video, './/age18', 'age limit')) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'age_limit': age_limit, 'formats': formats, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/planetmarathi.py�������������������������������������������������0000664�0000000�0000000�00000005657�14675634471�0022101�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( try_get, unified_strdate, ) class PlanetMarathiIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?planetmarathi\.com/titles/(?P<id>[^/#&?$]+)' _TESTS = [{ 'url': 'https://www.planetmarathi.com/titles/ek-unad-divas', 'playlist_mincount': 2, 'info_dict': { 'id': 'ek-unad-divas', }, 'playlist': [{ 'info_dict': { 'id': 'ASSETS-MOVIE-ASSET-01_ek-unad-divas', 'ext': 'mp4', 'title': 'ek unad divas', 'alt_title': 'चित्रपट', 'description': 'md5:41c7ed6b041c2fea9820a3f3125bd881', 'episode_number': 1, 'duration': 5539, 'upload_date': '20210829', }, }], # Trailer skipped }, { 'url': 'https://www.planetmarathi.com/titles/baap-beep-baap-season-1', 'playlist_mincount': 10, 'info_dict': { 'id': 'baap-beep-baap-season-1', }, 'playlist': [{ 'info_dict': { 'id': 'ASSETS-CHARACTER-PROFILE-SEASON-01-ASSET-01_baap-beep-baap-season-1', 'ext': 'mp4', 'title': 'Manohar Kanhere', 'alt_title': 'मनोहर कान्हेरे', 'description': 'md5:285ed45d5c0ab5522cac9a043354ebc6', 'season_number': 1, 'episode_number': 1, 'duration': 29, 'upload_date': '20210829', }, }], # Trailers, Episodes, other Character profiles skipped }] def _real_extract(self, url): playlist_id = self._match_id(url) entries = [] json_data = self._download_json( f'https://www.planetmarathi.com/api/v1/titles/{playlist_id}/assets', playlist_id)['assets'] for asset in json_data: asset_title = asset['mediaAssetName']['en'] if asset_title == 'Movie': asset_title = playlist_id.replace('-', ' ') asset_id = f'{asset["sk"]}_{playlist_id}'.replace('#', '-') formats, subtitles = self._extract_m3u8_formats_and_subtitles(asset['mediaAssetURL'], asset_id) entries.append({ 'id': asset_id, 'title': asset_title, 'alt_title': try_get(asset, lambda x: x['mediaAssetName']['mr']), 'description': try_get(asset, lambda x: x['mediaAssetDescription']['en']), 'season_number': asset.get('mediaAssetSeason'), 'episode_number': asset.get('mediaAssetIndexForAssetType'), 'duration': asset.get('mediaAssetDurationInSeconds'), 'upload_date': unified_strdate(asset.get('created')), 'formats': formats, 'subtitles': subtitles, }) return self.playlist_result(entries, playlist_id=playlist_id) ���������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/platzi.py��������������������������������������������������������0000664�0000000�0000000�00000016223�14675634471�0020542�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, int_or_none, str_or_none, try_get, url_or_none, urlencode_postdata, urljoin, ) class PlatziBaseIE(InfoExtractor): _LOGIN_URL = 'https://platzi.com/login/' _NETRC_MACHINE = 'platzi' def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') login_form = self._hidden_inputs(login_page) login_form.update({ 'email': username, 'password': password, }) urlh = self._request_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form), headers={'Referer': self._LOGIN_URL}) # login succeeded if 'platzi.com/login' not in urlh.url: return login_error = self._webpage_read_content( urlh, self._LOGIN_URL, None, 'Downloading login error page') login = self._parse_json( self._search_regex( r'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error, 'login'), None) for kind in ('error', 'password', 'nonFields'): error = str_or_none(login.get(f'{kind}Error')) if error: raise ExtractorError( f'Unable to login: {error}', expected=True) raise ExtractorError('Unable to log in') class PlatziIE(PlatziBaseIE): _VALID_URL = r'''(?x) https?:// (?: platzi\.com/clases| # es version courses\.platzi\.com/classes # en version )/[^/]+/(?P<id>\d+)-[^/?\#&]+ ''' _TESTS = [{ 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/', 'md5': '8f56448241005b561c10f11a595b37e3', 'info_dict': { 'id': '12074', 'ext': 'mp4', 'title': 'Creando nuestra primera página', 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc', 'duration': 420, }, 'skip': 'Requires platzi account credentials', }, { 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/', 'info_dict': { 'id': '13430', 'ext': 'mp4', 'title': 'Background', 'description': 'md5:49c83c09404b15e6e71defaf87f6b305', 'duration': 360, }, 'skip': 'Requires platzi account credentials', 'params': { 'skip_download': True, }, }] def _real_extract(self, url): lecture_id = self._match_id(url) webpage = self._download_webpage(url, lecture_id) data = self._parse_json( self._search_regex( # client_data may contain "};" so that we have to try more # strict regex first (r'client_data\s*=\s*({.+?})\s*;\s*\n', r'client_data\s*=\s*({.+?})\s*;'), webpage, 'client data'), lecture_id) material = data['initialState']['material'] desc = material['description'] title = desc['title'] formats = [] for server_id, server in material['videos'].items(): if not isinstance(server, dict): continue for format_id in ('hls', 'dash'): format_url = url_or_none(server.get(format_id)) if not format_url: continue if format_id == 'hls': formats.extend(self._extract_m3u8_formats( format_url, lecture_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id, note=f'Downloading {server_id} m3u8 information', fatal=False)) elif format_id == 'dash': formats.extend(self._extract_mpd_formats( format_url, lecture_id, mpd_id=format_id, note=f'Downloading {server_id} MPD manifest', fatal=False)) content = str_or_none(desc.get('content')) description = (clean_html(base64.b64decode(content).decode('utf-8')) if content else None) duration = int_or_none(material.get('duration'), invscale=60) return { 'id': lecture_id, 'title': title, 'description': description, 'duration': duration, 'formats': formats, } class PlatziCourseIE(PlatziBaseIE): _VALID_URL = r'''(?x) https?:// (?: platzi\.com/clases| # es version courses\.platzi\.com/classes # en version )/(?P<id>[^/?\#&]+) ''' _TESTS = [{ 'url': 'https://platzi.com/clases/next-js/', 'info_dict': { 'id': '1311', 'title': 'Curso de Next.js', }, 'playlist_count': 22, }, { 'url': 'https://courses.platzi.com/classes/communication-codestream/', 'info_dict': { 'id': '1367', 'title': 'Codestream Course', }, 'playlist_count': 14, }] @classmethod def suitable(cls, url): return False if PlatziIE.suitable(url) else super().suitable(url) def _real_extract(self, url): course_name = self._match_id(url) webpage = self._download_webpage(url, course_name) props = self._parse_json( self._search_regex(r'data\s*=\s*({.+?})\s*;', webpage, 'data'), course_name)['initialProps'] entries = [] for chapter_num, chapter in enumerate(props['concepts'], 1): if not isinstance(chapter, dict): continue materials = chapter.get('materials') if not materials or not isinstance(materials, list): continue chapter_title = chapter.get('title') chapter_id = str_or_none(chapter.get('id')) for material in materials: if not isinstance(material, dict): continue if material.get('material_type') != 'video': continue video_url = urljoin(url, material.get('url')) if not video_url: continue entries.append({ '_type': 'url_transparent', 'url': video_url, 'title': str_or_none(material.get('name')), 'id': str_or_none(material.get('id')), 'ie_key': PlatziIE.ie_key(), 'chapter': chapter_title, 'chapter_number': chapter_num, 'chapter_id': chapter_id, }) course_id = str(try_get(props, lambda x: x['course']['id'])) course_title = try_get(props, lambda x: x['course']['name'], str) return self.playlist_result(entries, course_id, course_title) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/playplustv.py����������������������������������������������������0000664�0000000�0000000�00000007107�14675634471�0021463�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..networking import PUTRequest from ..networking.exceptions import HTTPError from ..utils import ExtractorError, clean_html, int_or_none class PlayPlusTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?playplus\.(?:com|tv)/VOD/(?P<project_id>[0-9]+)/(?P<id>[0-9a-f]{32})' _TEST = { 'url': 'https://www.playplus.tv/VOD/7572/db8d274a5163424e967f35a30ddafb8e', 'md5': 'd078cb89d7ab6b9df37ce23c647aef72', 'info_dict': { 'id': 'db8d274a5163424e967f35a30ddafb8e', 'ext': 'mp4', 'title': 'Capítulo 179 - Final', 'description': 'md5:01085d62d8033a1e34121d3c3cabc838', 'timestamp': 1529992740, 'upload_date': '20180626', }, 'skip': 'Requires account credential', } _NETRC_MACHINE = 'playplustv' _GEO_COUNTRIES = ['BR'] _token = None _profile_id = None def _call_api(self, resource, video_id=None, query=None): return self._download_json('https://api.playplus.tv/api/media/v2/get' + resource, video_id, headers={ 'Authorization': 'Bearer ' + self._token, }, query=query) def _perform_login(self, username, password): req = PUTRequest( 'https://api.playplus.tv/api/web/login', json.dumps({ 'email': username, 'password': password, }).encode(), { 'Content-Type': 'application/json; charset=utf-8', }) try: self._token = self._download_json(req, None)['token'] except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 401: raise ExtractorError(self._parse_json( e.cause.response.read(), None)['errorMessage'], expected=True) raise self._profile = self._call_api('Profiles')['list'][0]['_id'] def _real_initialize(self): if not self._token: self.raise_login_required(method='password') def _real_extract(self, url): project_id, media_id = self._match_valid_url(url).groups() media = self._call_api( 'Media', media_id, { 'profileId': self._profile, 'projectId': project_id, 'mediaId': media_id, })['obj'] title = media['title'] formats = [] for f in media.get('files', []): f_url = f.get('url') if not f_url: continue file_info = f.get('fileInfo') or {} formats.append({ 'url': f_url, 'width': int_or_none(file_info.get('width')), 'height': int_or_none(file_info.get('height')), }) thumbnails = [] for thumb in media.get('thumbs', []): thumb_url = thumb.get('url') if not thumb_url: continue thumbnails.append({ 'url': thumb_url, 'width': int_or_none(thumb.get('width')), 'height': int_or_none(thumb.get('height')), }) return { 'id': media_id, 'title': title, 'formats': formats, 'thumbnails': thumbnails, 'description': clean_html(media.get('description')) or media.get('shortDescription'), 'timestamp': int_or_none(media.get('publishDate'), 1000), 'view_count': int_or_none(media.get('numberOfViews')), 'comment_count': int_or_none(media.get('numberOfComments')), 'tags': media.get('tags'), } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/playsuisse.py����������������������������������������������������0000664�0000000�0000000�00000021120�14675634471�0021430�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, parse_qs, traverse_obj, update_url_query, urlencode_postdata, ) class PlaySuisseIE(InfoExtractor): _NETRC_MACHINE = 'playsuisse' _VALID_URL = r'https?://(?:www\.)?playsuisse\.ch/(?:watch|detail)/(?:[^#]*[?&]episodeId=)?(?P<id>[0-9]+)' _TESTS = [ { # Old URL 'url': 'https://www.playsuisse.ch/watch/763211/0', 'only_matching': True, }, { # episode in a series 'url': 'https://www.playsuisse.ch/watch/763182?episodeId=763211', 'md5': '82df2a470b2dfa60c2d33772a8a60cf8', 'info_dict': { 'id': '763211', 'ext': 'mp4', 'title': 'Knochen', 'description': 'md5:8ea7a8076ba000cd9e8bc132fd0afdd8', 'duration': 3344, 'series': 'Wilder', 'season': 'Season 1', 'season_number': 1, 'episode': 'Knochen', 'episode_number': 1, 'thumbnail': 're:https://playsuisse-img.akamaized.net/', }, }, { # film 'url': 'https://www.playsuisse.ch/watch/808675', 'md5': '818b94c1d2d7c4beef953f12cb8f3e75', 'info_dict': { 'id': '808675', 'ext': 'mp4', 'title': 'Der Läufer', 'description': 'md5:9f61265c7e6dcc3e046137a792b275fd', 'duration': 5280, 'thumbnail': 're:https://playsuisse-img.akamaized.net/', }, }, { # series (treated as a playlist) 'url': 'https://www.playsuisse.ch/detail/1115687', 'info_dict': { 'description': 'md5:e4a2ae29a8895823045b5c3145a02aa3', 'id': '1115687', 'series': 'They all came out to Montreux', 'title': 'They all came out to Montreux', }, 'playlist': [{ 'info_dict': { 'description': 'md5:f2462744834b959a31adc6292380cda2', 'duration': 3180, 'episode': 'Folge 1', 'episode_number': 1, 'id': '1112663', 'season': 'Season 1', 'season_number': 1, 'series': 'They all came out to Montreux', 'thumbnail': 're:https://playsuisse-img.akamaized.net/', 'title': 'Folge 1', 'ext': 'mp4', }, }, { 'info_dict': { 'description': 'md5:9dfd308699fe850d3bce12dc1bad9b27', 'duration': 2935, 'episode': 'Folge 2', 'episode_number': 2, 'id': '1112661', 'season': 'Season 1', 'season_number': 1, 'series': 'They all came out to Montreux', 'thumbnail': 're:https://playsuisse-img.akamaized.net/', 'title': 'Folge 2', 'ext': 'mp4', }, }, { 'info_dict': { 'description': 'md5:14a93a3356b2492a8f786ab2227ef602', 'duration': 2994, 'episode': 'Folge 3', 'episode_number': 3, 'id': '1112664', 'season': 'Season 1', 'season_number': 1, 'series': 'They all came out to Montreux', 'thumbnail': 're:https://playsuisse-img.akamaized.net/', 'title': 'Folge 3', 'ext': 'mp4', }, }], }, ] _GRAPHQL_QUERY = ''' query AssetWatch($assetId: ID!) { assetV2(id: $assetId) { ...Asset episodes { ...Asset } } } fragment Asset on AssetV2 { id name description duration episodeNumber seasonNumber seriesName medias { type url } thumbnail16x9 { ...ImageDetails } thumbnail2x3 { ...ImageDetails } thumbnail16x9WithTitle { ...ImageDetails } thumbnail2x3WithTitle { ...ImageDetails } } fragment ImageDetails on AssetImage { id url }''' _LOGIN_BASE_URL = 'https://login.srgssr.ch/srgssrlogin.onmicrosoft.com' _LOGIN_PATH = 'B2C_1A__SignInV2' _ID_TOKEN = None def _perform_login(self, username, password): login_page = self._download_webpage( 'https://www.playsuisse.ch/api/sso/login', None, note='Downloading login page', query={'x': 'x', 'locale': 'de', 'redirectUrl': 'https://www.playsuisse.ch/'}) settings = self._search_json(r'var\s+SETTINGS\s*=', login_page, 'settings', None) csrf_token = settings['csrf'] query = {'tx': settings['transId'], 'p': self._LOGIN_PATH} status = traverse_obj(self._download_json( f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/SelfAsserted', None, 'Logging in', query=query, headers={'X-CSRF-TOKEN': csrf_token}, data=urlencode_postdata({ 'request_type': 'RESPONSE', 'signInName': username, 'password': password, }), expected_status=400), ('status', {int_or_none})) if status == 400: raise ExtractorError('Invalid username or password', expected=True) urlh = self._request_webpage( f'{self._LOGIN_BASE_URL}/{self._LOGIN_PATH}/api/CombinedSigninAndSignup/confirmed', None, 'Downloading ID token', query={ 'rememberMe': 'false', 'csrf_token': csrf_token, **query, 'diags': '', }) self._ID_TOKEN = traverse_obj(parse_qs(urlh.url), ('id_token', 0)) if not self._ID_TOKEN: raise ExtractorError('Login failed') def _get_media_data(self, media_id): # NOTE In the web app, the "locale" header is used to switch between languages, # However this doesn't seem to take effect when passing the header here. response = self._download_json( 'https://www.playsuisse.ch/api/graphql', media_id, data=json.dumps({ 'operationName': 'AssetWatch', 'query': self._GRAPHQL_QUERY, 'variables': {'assetId': media_id}, }).encode(), headers={'Content-Type': 'application/json', 'locale': 'de'}) return response['data']['assetV2'] def _real_extract(self, url): if not self._ID_TOKEN: self.raise_login_required(method='password') media_id = self._match_id(url) media_data = self._get_media_data(media_id) info = self._extract_single(media_data) if media_data.get('episodes'): info.update({ '_type': 'playlist', 'entries': map(self._extract_single, media_data['episodes']), }) return info def _extract_single(self, media_data): thumbnails = traverse_obj(media_data, lambda k, _: k.startswith('thumbnail')) formats, subtitles = [], {} for media in traverse_obj(media_data, 'medias', default=[]): if not media.get('url') or media.get('type') != 'HLS': continue f, subs = self._extract_m3u8_formats_and_subtitles( update_url_query(media['url'], {'id_token': self._ID_TOKEN}), media_data['id'], 'mp4', m3u8_id='HLS', fatal=False) formats.extend(f) self._merge_subtitles(subs, target=subtitles) return { 'id': media_data['id'], 'title': media_data.get('name'), 'description': media_data.get('description'), 'thumbnails': thumbnails, 'duration': int_or_none(media_data.get('duration')), 'formats': formats, 'subtitles': subtitles, 'series': media_data.get('seriesName'), 'season_number': int_or_none(media_data.get('seasonNumber')), 'episode': media_data.get('name') if media_data.get('episodeNumber') else None, 'episode_number': int_or_none(media_data.get('episodeNumber')), } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/playtvak.py������������������������������������������������������0000664�0000000�0000000�00000015607�14675634471�0021077�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, parse_iso8601, qualities, ) class PlaytvakIE(InfoExtractor): IE_DESC = 'Playtvak.cz, iDNES.cz and Lidovky.cz' _VALID_URL = r'https?://(?:.+?\.)?(?:playtvak|idnes|lidovky|metro)\.cz/.*\?(?:c|idvideo)=(?P<id>[^&]+)' _TESTS = [{ 'url': 'http://www.playtvak.cz/vyzente-vosy-a-srsne-ze-zahrady-dn5-/hodinovy-manzel.aspx?c=A150730_150323_hodinovy-manzel_kuko', 'md5': '4525ae312c324b4be2f4603cc78ceb4a', 'info_dict': { 'id': 'A150730_150323_hodinovy-manzel_kuko', 'ext': 'mp4', 'title': 'Vyžeňte vosy a sršně ze zahrady', 'description': 'md5:4436e61b7df227a093778efb7e373571', 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'duration': 279, 'timestamp': 1438732860, 'upload_date': '20150805', 'is_live': False, }, }, { # live video test 'url': 'http://slowtv.playtvak.cz/planespotting-0pr-/planespotting.aspx?c=A150624_164934_planespotting_cat', 'info_dict': { 'id': 'A150624_164934_planespotting_cat', 'ext': 'flv', 'title': 're:^Planespotting [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'Sledujte provoz na ranveji Letiště Václava Havla v Praze', 'is_live': True, }, 'params': { 'skip_download': True, # requires rtmpdump }, }, { # another live stream, this one without Misc.videoFLV 'url': 'https://slowtv.playtvak.cz/zive-sledujte-vlaky-v-primem-prenosu-dwi-/hlavni-nadrazi.aspx?c=A151218_145728_hlavni-nadrazi_plap', 'info_dict': { 'id': 'A151218_145728_hlavni-nadrazi_plap', 'ext': 'flv', 'title': 're:^Hlavní nádraží [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { 'skip_download': True, # requires rtmpdump }, }, { # idnes.cz 'url': 'http://zpravy.idnes.cz/pes-zavreny-v-aute-rozbijeni-okynek-v-aute-fj5-/domaci.aspx?c=A150809_104116_domaci_pku', 'md5': '819832ba33cd7016e58a6658577fe289', 'info_dict': { 'id': 'A150809_104116_domaci_pku', 'ext': 'mp4', 'title': 'Zavřeli jsme mraženou pizzu do auta. Upekla se', 'description': 'md5:01e73f02329e2e5760bd5eed4d42e3c2', 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'duration': 39, 'timestamp': 1438969140, 'upload_date': '20150807', 'is_live': False, }, }, { # lidovky.cz 'url': 'http://www.lidovky.cz/dalsi-demonstrace-v-praze-o-migraci-duq-/video.aspx?c=A150808_214044_ln-video_ELE', 'md5': 'c7209ac4ba9d234d4ad5bab7485bcee8', 'info_dict': { 'id': 'A150808_214044_ln-video_ELE', 'ext': 'mp4', 'title': 'Táhni! Demonstrace proti imigrantům budila emoce', 'description': 'md5:97c81d589a9491fbfa323c9fa3cca72c', 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'timestamp': 1439052180, 'upload_date': '20150808', 'is_live': False, }, }, { # metro.cz 'url': 'http://www.metro.cz/video-pod-billboardem-se-na-vltavske-roztocil-kolotoc-deti-vozil-jen-par-hodin-1hx-/metro-extra.aspx?c=A141111_173251_metro-extra_row', 'md5': '84fc1deedcac37b7d4a6ccae7c716668', 'info_dict': { 'id': 'A141111_173251_metro-extra_row', 'ext': 'mp4', 'title': 'Recesisté udělali z billboardu kolotoč', 'description': 'md5:7369926049588c3989a66c9c1a043c4c', 'thumbnail': r're:(?i)^https?://.*\.(?:jpg|png)$', 'timestamp': 1415725500, 'upload_date': '20141111', 'is_live': False, }, }, { 'url': 'http://www.playtvak.cz/embed.aspx?idvideo=V150729_141549_play-porad_kuko', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) info_url = self._html_search_regex( r'Misc\.video(?:FLV)?\(\s*{\s*data\s*:\s*"([^"]+)"', webpage, 'info url') parsed_url = urllib.parse.urlparse(info_url) qs = urllib.parse.parse_qs(parsed_url.query) qs.update({ 'reklama': ['0'], 'type': ['js'], }) info_url = urllib.parse.urlunparse( parsed_url._replace(query=urllib.parse.urlencode(qs, True))) json_info = self._download_json( info_url, video_id, transform_source=lambda s: s[s.index('{'):s.rindex('}') + 1]) item = None for i in json_info['items']: if i.get('type') == 'video' or i.get('type') == 'stream': item = i break if not item: raise ExtractorError('No suitable stream found') quality = qualities(('low', 'middle', 'high')) formats = [] for fmt in item['video']: video_url = fmt.get('file') if not video_url: continue format_ = fmt['format'] format_id = '{}_{}'.format(format_, fmt['quality']) preference = None if format_ in ('mp4', 'webm'): ext = format_ elif format_ == 'rtmp': ext = 'flv' elif format_ == 'apple': ext = 'mp4' # Some streams have mp3 audio which does not play # well with ffmpeg filter aac_adtstoasc preference = -10 elif format_ == 'adobe': # f4m manifest fails with 404 in 80% of requests continue else: # Other formats not supported yet continue formats.append({ 'url': video_url, 'ext': ext, 'format_id': format_id, 'quality': quality(fmt.get('quality')), 'preference': preference, }) title = item['title'] is_live = item['type'] == 'stream' description = self._og_search_description(webpage, default=None) or self._html_search_meta( 'description', webpage, 'description', default=None) timestamp = None duration = None if not is_live: duration = int_or_none(item.get('length')) timestamp = item.get('published') if timestamp: timestamp = parse_iso8601(timestamp[:-5]) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': item.get('image'), 'duration': duration, 'timestamp': timestamp, 'is_live': is_live, 'formats': formats, } �������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/playwire.py������������������������������������������������������0000664�0000000�0000000�00000004561�14675634471�0021075�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( dict_get, float_or_none, ) class PlaywireIE(InfoExtractor): _VALID_URL = r'https?://(?:config|cdn)\.playwire\.com(?:/v2)?/(?P<publisher_id>\d+)/(?:videos/v2|embed|config)/(?P<id>\d+)' _EMBED_REGEX = [r'<script[^>]+data-config=(["\'])(?P<url>(?:https?:)?//config\.playwire\.com/.+?)\1'] _TESTS = [{ 'url': 'http://config.playwire.com/14907/videos/v2/3353705/player.json', 'md5': 'e6398701e3595888125729eaa2329ed9', 'info_dict': { 'id': '3353705', 'ext': 'mp4', 'title': 'S04_RM_UCL_Rus', 'thumbnail': r're:^https?://.*\.png$', 'duration': 145.94, }, }, { # m3u8 in f4m 'url': 'http://config.playwire.com/21772/videos/v2/4840492/zeus.json', 'info_dict': { 'id': '4840492', 'ext': 'mp4', 'title': 'ITV EL SHOW FULL', }, 'params': { # m3u8 download 'skip_download': True, }, }, { # Multiple resolutions while bitrates missing 'url': 'http://cdn.playwire.com/11625/embed/85228.html', 'only_matching': True, }, { 'url': 'http://config.playwire.com/12421/videos/v2/3389892/zeus.json', 'only_matching': True, }, { 'url': 'http://cdn.playwire.com/v2/12342/config/1532636.json', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) publisher_id, video_id = mobj.group('publisher_id'), mobj.group('id') player = self._download_json( f'http://config.playwire.com/{publisher_id}/videos/v2/{video_id}/zeus.json', video_id) title = player['settings']['title'] duration = float_or_none(player.get('duration'), 1000) content = player['content'] thumbnail = content.get('poster') src = content['media']['f4m'] formats = self._extract_f4m_formats(src, video_id, m3u8_id='hls') for a_format in formats: if not dict_get(a_format, ['tbr', 'width', 'height']): a_format['quality'] = 1 if '-hd.' in a_format['url'] else 0 return { 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pluralsight.py���������������������������������������������������0000664�0000000�0000000�00000043357�14675634471�0021605�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import collections import json import os import random import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, dict_get, float_or_none, int_or_none, parse_duration, parse_qs, qualities, srt_subtitles_timecode, try_get, update_url_query, urlencode_postdata, ) class PluralsightBaseIE(InfoExtractor): _API_BASE = 'https://app.pluralsight.com' _GRAPHQL_EP = f'{_API_BASE}/player/api/graphql' _GRAPHQL_HEADERS = { 'Content-Type': 'application/json;charset=UTF-8', } _GRAPHQL_COURSE_TMPL = ''' query BootstrapPlayer { rpc { bootstrapPlayer { profile { firstName lastName email username userHandle authed isAuthed plan } course(courseId: "%s") { name title courseHasCaptions translationLanguages { code name } supportsWideScreenVideoFormats timestamp modules { name title duration formattedDuration author authorized clips { authorized clipId duration formattedDuration id index moduleIndex moduleTitle name title watched } } } } } }''' def _download_course(self, course_id, url, display_id): try: return self._download_course_rpc(course_id, url, display_id) except ExtractorError: # Old API fallback return self._download_json( 'https://app.pluralsight.com/player/user/api/v1/player/payload', display_id, data=urlencode_postdata({'courseId': course_id}), headers={'Referer': url}) def _download_course_rpc(self, course_id, url, display_id): response = self._download_json( self._GRAPHQL_EP, display_id, data=json.dumps({ 'query': self._GRAPHQL_COURSE_TMPL % course_id, 'variables': {}, }).encode(), headers=self._GRAPHQL_HEADERS) course = try_get( response, lambda x: x['data']['rpc']['bootstrapPlayer']['course'], dict) if course: return course raise ExtractorError( '{} said: {}'.format(self.IE_NAME, response['error']['message']), expected=True) class PluralsightIE(PluralsightBaseIE): IE_NAME = 'pluralsight' _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:training/)?player\?' _LOGIN_URL = 'https://app.pluralsight.com/id/' _NETRC_MACHINE = 'pluralsight' _TESTS = [{ 'url': 'http://www.pluralsight.com/training/player?author=mike-mckeown&name=hosting-sql-server-windows-azure-iaas-m7-mgmt&mode=live&clip=3&course=hosting-sql-server-windows-azure-iaas', 'md5': '4d458cf5cf4c593788672419a8dd4cf8', 'info_dict': { 'id': 'hosting-sql-server-windows-azure-iaas-m7-mgmt-04', 'ext': 'mp4', 'title': 'Demo Monitoring', 'duration': 338, }, 'skip': 'Requires pluralsight account credentials', }, { 'url': 'https://app.pluralsight.com/training/player?course=angularjs-get-started&author=scott-allen&name=angularjs-get-started-m1-introduction&clip=0&mode=live', 'only_matching': True, }, { # available without pluralsight account 'url': 'http://app.pluralsight.com/training/player?author=scott-allen&name=angularjs-get-started-m1-introduction&mode=live&clip=0&course=angularjs-get-started', 'only_matching': True, }, { 'url': 'https://app.pluralsight.com/player?course=ccna-intro-networking&author=ross-bagurdes&name=ccna-intro-networking-m06&clip=0', 'only_matching': True, }] GRAPHQL_VIEWCLIP_TMPL = ''' query viewClip { viewClip(input: { author: "%(author)s", clipIndex: %(clipIndex)d, courseName: "%(courseName)s", includeCaptions: %(includeCaptions)s, locale: "%(locale)s", mediaType: "%(mediaType)s", moduleName: "%(moduleName)s", quality: "%(quality)s" }) { urls { url cdn rank source }, status } }''' def _perform_login(self, username, password): login_page = self._download_webpage( self._LOGIN_URL, None, 'Downloading login page') login_form = self._hidden_inputs(login_page) login_form.update({ 'Username': username, 'Password': password, }) post_url = self._search_regex( r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page, 'post url', default=self._LOGIN_URL, group='url') if not post_url.startswith('http'): post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url) response = self._download_webpage( post_url, None, 'Logging in', data=urlencode_postdata(login_form), headers={'Content-Type': 'application/x-www-form-urlencoded'}) error = self._search_regex( r'<span[^>]+class="field-validation-error"[^>]*>([^<]+)</span>', response, 'error message', default=None) if error: raise ExtractorError(f'Unable to login: {error}', expected=True) if all(not re.search(p, response) for p in ( r'__INITIAL_STATE__', r'["\']currentUser["\']', # new layout? r'>\s*Sign out\s*<')): BLOCKED = 'Your account has been blocked due to suspicious activity' if BLOCKED in response: raise ExtractorError( f'Unable to login: {BLOCKED}', expected=True) MUST_AGREE = 'To continue using Pluralsight, you must agree to' if any(p in response for p in (MUST_AGREE, '>Disagree<', '>Agree<')): raise ExtractorError( f'Unable to login: {MUST_AGREE} some documents. Go to pluralsight.com, ' 'log in and agree with what Pluralsight requires.', expected=True) raise ExtractorError('Unable to log in') def _get_subtitles(self, author, clip_idx, clip_id, lang, name, duration, video_id): captions = None if clip_id: captions = self._download_json( f'{self._API_BASE}/transcript/api/v1/caption/json/{clip_id}/{lang}', video_id, 'Downloading captions JSON', 'Unable to download captions JSON', fatal=False) if not captions: captions_post = { 'a': author, 'cn': int(clip_idx), 'lc': lang, 'm': name, } captions = self._download_json( f'{self._API_BASE}/player/retrieve-captions', video_id, 'Downloading captions JSON', 'Unable to download captions JSON', fatal=False, data=json.dumps(captions_post).encode(), headers={'Content-Type': 'application/json;charset=utf-8'}) if captions: return { lang: [{ 'ext': 'json', 'data': json.dumps(captions), }, { 'ext': 'srt', 'data': self._convert_subtitles(duration, captions), }], } @staticmethod def _convert_subtitles(duration, subs): srt = '' TIME_OFFSET_KEYS = ('displayTimeOffset', 'DisplayTimeOffset') TEXT_KEYS = ('text', 'Text') for num, current in enumerate(subs): current = subs[num] start, text = ( float_or_none(dict_get(current, TIME_OFFSET_KEYS, skip_false_values=False)), dict_get(current, TEXT_KEYS)) if start is None or text is None: continue end = duration if num == len(subs) - 1 else float_or_none( dict_get(subs[num + 1], TIME_OFFSET_KEYS, skip_false_values=False)) if end is None: continue srt += os.linesep.join( ( f'{num}', f'{srt_subtitles_timecode(start)} --> {srt_subtitles_timecode(end)}', text, os.linesep, )) return srt def _real_extract(self, url): qs = parse_qs(url) author = qs.get('author', [None])[0] name = qs.get('name', [None])[0] clip_idx = qs.get('clip', [None])[0] course_name = qs.get('course', [None])[0] if any(not f for f in (author, name, clip_idx, course_name)): raise ExtractorError('Invalid URL', expected=True) display_id = f'{name}-{clip_idx}' course = self._download_course(course_name, url, display_id) collection = course['modules'] clip = None for module_ in collection: if name in (module_.get('moduleName'), module_.get('name')): for clip_ in module_.get('clips', []): clip_index = clip_.get('clipIndex') if clip_index is None: clip_index = clip_.get('index') if clip_index is None: continue if str(clip_index) == clip_idx: clip = clip_ break if not clip: raise ExtractorError('Unable to resolve clip') title = clip['title'] clip_id = clip.get('clipName') or clip.get('name') or clip['clipId'] QUALITIES = { 'low': {'width': 640, 'height': 480}, 'medium': {'width': 848, 'height': 640}, 'high': {'width': 1024, 'height': 768}, 'high-widescreen': {'width': 1280, 'height': 720}, } QUALITIES_PREFERENCE = ('low', 'medium', 'high', 'high-widescreen') quality_key = qualities(QUALITIES_PREFERENCE) AllowedQuality = collections.namedtuple('AllowedQuality', ['ext', 'qualities']) ALLOWED_QUALITIES = ( AllowedQuality('webm', ['high']), AllowedQuality('mp4', ['low', 'medium', 'high']), ) # Some courses also offer widescreen resolution for high quality (see # https://github.com/ytdl-org/youtube-dl/issues/7766) widescreen = course.get('supportsWideScreenVideoFormats') is True best_quality = 'high-widescreen' if widescreen else 'high' if widescreen: for allowed_quality in ALLOWED_QUALITIES: allowed_quality.qualities.append(best_quality) # In order to minimize the number of calls to ViewClip API and reduce # the probability of being throttled or banned by Pluralsight we will request # only single format until formats listing was explicitly requested. if self.get_param('listformats', False): allowed_qualities = ALLOWED_QUALITIES else: def guess_allowed_qualities(): req_format = self.get_param('format') or 'best' req_format_split = req_format.split('-', 1) if len(req_format_split) > 1: req_ext, req_quality = req_format_split req_quality = '-'.join(req_quality.split('-')[:2]) for allowed_quality in ALLOWED_QUALITIES: if req_ext == allowed_quality.ext and req_quality in allowed_quality.qualities: return (AllowedQuality(req_ext, (req_quality, )), ) req_ext = 'webm' if self.get_param('prefer_free_formats') else 'mp4' return (AllowedQuality(req_ext, (best_quality, )), ) allowed_qualities = guess_allowed_qualities() formats = [] for ext, qualities_ in allowed_qualities: for quality in qualities_: f = QUALITIES[quality].copy() clip_post = { 'author': author, 'includeCaptions': 'false', 'clipIndex': int(clip_idx), 'courseName': course_name, 'locale': 'en', 'moduleName': name, 'mediaType': ext, 'quality': '%dx%d' % (f['width'], f['height']), } format_id = f'{ext}-{quality}' try: viewclip = self._download_json( self._GRAPHQL_EP, display_id, f'Downloading {format_id} viewclip graphql', data=json.dumps({ 'query': self.GRAPHQL_VIEWCLIP_TMPL % clip_post, 'variables': {}, }).encode(), headers=self._GRAPHQL_HEADERS)['data']['viewClip'] except ExtractorError: # Still works but most likely will go soon viewclip = self._download_json( f'{self._API_BASE}/video/clips/viewclip', display_id, f'Downloading {format_id} viewclip JSON', fatal=False, data=json.dumps(clip_post).encode(), headers={'Content-Type': 'application/json;charset=utf-8'}) # Pluralsight tracks multiple sequential calls to ViewClip API and start # to return 429 HTTP errors after some time (see # https://github.com/ytdl-org/youtube-dl/pull/6989). Moreover it may even lead # to account ban (see https://github.com/ytdl-org/youtube-dl/issues/6842). # To somewhat reduce the probability of these consequences # we will sleep random amount of time before each call to ViewClip. self._sleep( random.randint(5, 10), display_id, '%(video_id)s: Waiting for %(timeout)s seconds to avoid throttling') if not viewclip: continue clip_urls = viewclip.get('urls') if not isinstance(clip_urls, list): continue for clip_url_data in clip_urls: clip_url = clip_url_data.get('url') if not clip_url: continue cdn = clip_url_data.get('cdn') clip_f = f.copy() clip_f.update({ 'url': clip_url, 'ext': ext, 'format_id': f'{format_id}-{cdn}' if cdn else format_id, 'quality': quality_key(quality), 'source_preference': int_or_none(clip_url_data.get('rank')), }) formats.append(clip_f) duration = int_or_none( clip.get('duration')) or parse_duration(clip.get('formattedDuration')) # TODO: other languages? subtitles = self.extract_subtitles( author, clip_idx, clip.get('clipId'), 'en', name, duration, display_id) return { 'id': clip_id, 'title': title, 'duration': duration, 'creator': author, 'formats': formats, 'subtitles': subtitles, } class PluralsightCourseIE(PluralsightBaseIE): IE_NAME = 'pluralsight:course' _VALID_URL = r'https?://(?:(?:www|app)\.)?pluralsight\.com/(?:library/)?courses/(?P<id>[^/]+)' _TESTS = [{ # Free course from Pluralsight Starter Subscription for Microsoft TechNet # https://offers.pluralsight.com/technet?loc=zTS3z&prod=zOTprodz&tech=zOttechz&prog=zOTprogz&type=zSOz&media=zOTmediaz&country=zUSz 'url': 'http://www.pluralsight.com/courses/hosting-sql-server-windows-azure-iaas', 'info_dict': { 'id': 'hosting-sql-server-windows-azure-iaas', 'title': 'Hosting SQL Server in Microsoft Azure IaaS Fundamentals', 'description': 'md5:61b37e60f21c4b2f91dc621a977d0986', }, 'playlist_count': 31, }, { # available without pluralsight account 'url': 'https://www.pluralsight.com/courses/angularjs-get-started', 'only_matching': True, }, { 'url': 'https://app.pluralsight.com/library/courses/understanding-microsoft-azure-amazon-aws/table-of-contents', 'only_matching': True, }] def _real_extract(self, url): course_id = self._match_id(url) # TODO: PSM cookie course = self._download_course(course_id, url, course_id) title = course['title'] course_name = course['name'] course_data = course['modules'] description = course.get('description') or course.get('shortDescription') entries = [] for num, module in enumerate(course_data, 1): author = module.get('author') module_name = module.get('name') if not author or not module_name: continue for clip in module.get('clips', []): clip_index = int_or_none(clip.get('index')) if clip_index is None: continue clip_url = update_url_query( f'{self._API_BASE}/player', query={ 'mode': 'live', 'course': course_name, 'author': author, 'name': module_name, 'clip': clip_index, }) entries.append({ '_type': 'url_transparent', 'url': clip_url, 'ie_key': PluralsightIE.ie_key(), 'chapter': module.get('title'), 'chapter_number': num, 'chapter_id': module.get('moduleRef'), }) return self.playlist_result(entries, course_id, title, description) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/plutotv.py�������������������������������������������������������0000664�0000000�0000000�00000017545�14675634471�0020764�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re import urllib.parse import uuid from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, int_or_none, try_get, url_or_none, ) class PlutoTVIE(InfoExtractor): _WORKING = False _VALID_URL = r'''(?x) https?://(?:www\.)?pluto\.tv(?:/[^/]+)?/on-demand /(?P<video_type>movies|series) /(?P<series_or_movie_slug>[^/]+) (?: (?:/seasons?/(?P<season_no>\d+))? (?:/episode/(?P<episode_slug>[^/]+))? )? /?(?:$|[#?])''' _INFO_URL = 'https://service-vod.clusters.pluto.tv/v3/vod/slugs/' _INFO_QUERY_PARAMS = { 'appName': 'web', 'appVersion': 'na', 'clientID': str(uuid.uuid1()), 'clientModelNumber': 'na', 'serverSideAds': 'false', 'deviceMake': 'unknown', 'deviceModel': 'web', 'deviceType': 'web', 'deviceVersion': 'unknown', 'sid': str(uuid.uuid1()), } _TESTS = [ { 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/2/episode/its-in-the-cards-2009-2-3', 'md5': 'ebcdd8ed89aaace9df37924f722fd9bd', 'info_dict': { 'id': '5de6c598e9379ae4912df0a8', 'ext': 'mp4', 'title': 'It\'s In The Cards', 'episode': 'It\'s In The Cards', 'description': 'The teams face off against each other in a 3-on-2 soccer showdown. Strategy comes into play, though, as each team gets to select their opposing teams’ two defenders.', 'series': 'I Love Money', 'season_number': 2, 'episode_number': 3, 'duration': 3600, }, }, { 'url': 'https://pluto.tv/on-demand/series/i-love-money/season/1/', 'playlist_count': 11, 'info_dict': { 'id': '5de6c582e9379ae4912dedbd', 'title': 'I Love Money - Season 1', }, }, { 'url': 'https://pluto.tv/on-demand/series/i-love-money/', 'playlist_count': 26, 'info_dict': { 'id': '5de6c582e9379ae4912dedbd', 'title': 'I Love Money', }, }, { 'url': 'https://pluto.tv/on-demand/movies/arrival-2015-1-1', 'md5': '3cead001d317a018bf856a896dee1762', 'info_dict': { 'id': '5e83ac701fa6a9001bb9df24', 'ext': 'mp4', 'title': 'Arrival', 'description': 'When mysterious spacecraft touch down across the globe, an elite team - led by expert translator Louise Banks (Academy Award® nominee Amy Adams) – races against time to decipher their intent.', 'duration': 9000, }, }, { 'url': 'https://pluto.tv/en/on-demand/series/manhunters-fugitive-task-force/seasons/1/episode/third-times-the-charm-1-1', 'only_matching': True, }, { 'url': 'https://pluto.tv/it/on-demand/series/csi-vegas/episode/legacy-2021-1-1', 'only_matching': True, }, { 'url': 'https://pluto.tv/en/on-demand/movies/attack-of-the-killer-tomatoes-1977-1-1-ptv1', 'md5': '7db56369c0da626a32d505ec6eb3f89f', 'info_dict': { 'id': '5b190c7bb0875c36c90c29c4', 'ext': 'mp4', 'title': 'Attack of the Killer Tomatoes', 'description': 'A group of scientists band together to save the world from mutated tomatoes that KILL! (1978)', 'duration': 5700, }, }, ] def _to_ad_free_formats(self, video_id, formats, subtitles): ad_free_formats, ad_free_subtitles, m3u8_urls = [], {}, set() for fmt in formats: res = self._download_webpage( fmt.get('url'), video_id, note='Downloading m3u8 playlist', fatal=False) if not res: continue first_segment_url = re.search( r'^(https?://.*/)0\-(end|[0-9]+)/[^/]+\.ts$', res, re.MULTILINE) if first_segment_url: m3u8_urls.add( urllib.parse.urljoin(first_segment_url.group(1), '0-end/master.m3u8')) continue first_segment_url = re.search( r'^(https?://.*/).+\-0+[0-1]0\.ts$', res, re.MULTILINE) if first_segment_url: m3u8_urls.add( urllib.parse.urljoin(first_segment_url.group(1), 'master.m3u8')) continue for m3u8_url in m3u8_urls: fmts, subs = self._extract_m3u8_formats_and_subtitles( m3u8_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) ad_free_formats.extend(fmts) ad_free_subtitles = self._merge_subtitles(ad_free_subtitles, subs) if ad_free_formats: formats, subtitles = ad_free_formats, ad_free_subtitles else: self.report_warning('Unable to find ad-free formats') return formats, subtitles def _get_video_info(self, video_json, slug, series_name=None): video_id = video_json.get('_id', slug) formats, subtitles = [], {} for video_url in try_get(video_json, lambda x: x['stitched']['urls'], list) or []: if video_url.get('type') != 'hls': continue url = url_or_none(video_url.get('url')) fmts, subs = self._extract_m3u8_formats_and_subtitles( url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) formats.extend(fmts) subtitles = self._merge_subtitles(subtitles, subs) formats, subtitles = self._to_ad_free_formats(video_id, formats, subtitles) info = { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'title': video_json.get('name'), 'description': video_json.get('description'), 'duration': float_or_none(video_json.get('duration'), scale=1000), } if series_name: info.update({ 'series': series_name, 'episode': video_json.get('name'), 'season_number': int_or_none(video_json.get('season')), 'episode_number': int_or_none(video_json.get('number')), }) return info def _real_extract(self, url): mobj = self._match_valid_url(url).groupdict() info_slug = mobj['series_or_movie_slug'] video_json = self._download_json(self._INFO_URL + info_slug, info_slug, query=self._INFO_QUERY_PARAMS) if mobj['video_type'] == 'series': series_name = video_json.get('name', info_slug) season_number, episode_slug = mobj.get('season_number'), mobj.get('episode_slug') videos = [] for season in video_json['seasons']: if season_number is not None and season_number != int_or_none(season.get('number')): continue for episode in season['episodes']: if episode_slug is not None and episode_slug != episode.get('slug'): continue videos.append(self._get_video_info(episode, episode_slug, series_name)) if not videos: raise ExtractorError('Failed to find any videos to extract') if episode_slug is not None and len(videos) == 1: return videos[0] playlist_title = series_name if season_number is not None: playlist_title += ' - Season %d' % season_number return self.playlist_result(videos, playlist_id=video_json.get('_id', info_slug), playlist_title=playlist_title) return self._get_video_info(video_json, info_slug) �����������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/podbayfm.py������������������������������������������������������0000664�0000000�0000000�00000005771�14675634471�0021046�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( OnDemandPagedList, clean_html, int_or_none, jwt_decode_hs256, url_or_none, ) from ..utils.traversal import traverse_obj def result_from_props(props): return { **traverse_obj(props, { 'id': ('_id', {str}), 'title': ('title', {str}), 'url': ('mediaURL', {url_or_none}), 'description': ('description', {clean_html}), 'thumbnail': ('image', {jwt_decode_hs256}, 'url', {url_or_none}), 'timestamp': ('timestamp', {int_or_none}), 'duration': ('duration', {int_or_none}), }), 'ext': 'mp3', 'vcodec': 'none', } class PodbayFMIE(InfoExtractor): _VALID_URL = r'https?://podbay\.fm/p/[^/?#]+/e/(?P<id>\d+)' _TESTS = [{ 'url': 'https://podbay.fm/p/behind-the-bastards/e/1647338400', 'md5': '895ac8505de349515f5ee8a4a3195c93', 'info_dict': { 'id': '62306451f4a48e58d0c4d6a8', 'title': 'Part One: Kissinger', 'ext': 'mp3', 'description': r're:^We begin our epic six part series on Henry Kissinger.+', 'thumbnail': r're:^https?://.*\.jpg', 'timestamp': 1647338400, 'duration': 5001, 'upload_date': '20220315', }, }] def _real_extract(self, url): episode_id = self._match_id(url) webpage = self._download_webpage(url, episode_id) data = self._search_nextjs_data(webpage, episode_id) return result_from_props(data['props']['pageProps']['episode']) class PodbayFMChannelIE(InfoExtractor): _VALID_URL = r'https?://podbay\.fm/p/(?P<id>[^/?#]+)/?(?:$|[?#])' _TESTS = [{ 'url': 'https://podbay.fm/p/behind-the-bastards', 'info_dict': { 'id': 'behind-the-bastards', 'title': 'Behind the Bastards', }, 'playlist_mincount': 21, }] _PAGE_SIZE = 10 def _fetch_page(self, channel_id, pagenum): return self._download_json( f'https://podbay.fm/api/podcast?reverse=true&page={pagenum}&slug={channel_id}', f'Downloading channel JSON page {pagenum + 1}', channel_id)['podcast'] @staticmethod def _results_from_page(channel_id, page): return [{ **result_from_props(e), 'extractor': PodbayFMIE.IE_NAME, 'extractor_key': PodbayFMIE.ie_key(), # somehow they use timestamps as the episode identifier 'webpage_url': f'https://podbay.fm/p/{channel_id}/e/{e["timestamp"]}', } for e in page['episodes']] def _real_extract(self, url): channel_id = self._match_id(url) first_page = self._fetch_page(channel_id, 0) entries = OnDemandPagedList( lambda pagenum: self._results_from_page( channel_id, self._fetch_page(channel_id, pagenum) if pagenum else first_page), self._PAGE_SIZE) return self.playlist_result(entries, channel_id, first_page.get('title')) �������yt-dlp-2024.09.27/yt_dlp/extractor/podchaser.py�����������������������������������������������������0000664�0000000�0000000�00000007270�14675634471�0021211�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import functools import json from .common import InfoExtractor from ..utils import ( OnDemandPagedList, float_or_none, str_or_none, str_to_int, traverse_obj, unified_timestamp, ) class PodchaserIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?podchaser\.com/podcasts/[\w-]+-(?P<podcast_id>\d+)(?:/episodes/[\w-]+-(?P<id>\d+))?' _PAGE_SIZE = 100 _TESTS = [{ 'url': 'https://www.podchaser.com/podcasts/cum-town-36924/episodes/ep-285-freeze-me-off-104365585', 'info_dict': { 'id': '104365585', 'title': 'Ep. 285 – freeze me off', 'description': 'cam ahn', 'thumbnail': r're:^https?://.*\.jpg$', 'ext': 'mp3', 'categories': ['Comedy'], 'tags': ['comedy', 'dark humor'], 'series': 'Cum Town', 'duration': 3708, 'timestamp': 1636531259, 'upload_date': '20211110', 'average_rating': 4.0, }, }, { 'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853', 'info_dict': { 'id': '28853', 'title': 'The Bone Zone', 'description': 'Podcast by The Bone Zone', }, 'playlist_count': 275, }, { 'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes', 'info_dict': { 'id': '699349', 'title': 'Sean Carroll\'s Mindscape: Science, Society, Philosophy, Culture, Arts, and Ideas', 'description': 'md5:2cbd8f4749891a84dc8235342e0b5ff1', }, 'playlist_mincount': 225, }] @staticmethod def _parse_episode(episode, podcast): return { 'id': str(episode.get('id')), 'title': episode.get('title'), 'description': episode.get('description'), 'url': episode.get('audio_url'), 'thumbnail': episode.get('image_url'), 'duration': str_to_int(episode.get('length')), 'timestamp': unified_timestamp(episode.get('air_date')), 'average_rating': float_or_none(episode.get('rating')), 'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))), 'tags': traverse_obj(podcast, ('tags', ..., 'text')), 'series': podcast.get('title'), } def _call_api(self, path, *args, **kwargs): return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs) def _fetch_page(self, podcast_id, podcast, page): json_response = self._call_api( 'list/episode', podcast_id, headers={'Content-Type': 'application/json;charset=utf-8'}, data=json.dumps({ 'start': page * self._PAGE_SIZE, 'count': self._PAGE_SIZE, 'sort_order': 'SORT_ORDER_RECENT', 'filters': { 'podcast_id': podcast_id, }, 'options': {}, }).encode()) for episode in json_response['entities']: yield self._parse_episode(episode, podcast) def _real_extract(self, url): podcast_id, episode_id = self._match_valid_url(url).group('podcast_id', 'id') podcast = self._call_api(f'podcasts/{podcast_id}', episode_id or podcast_id) if not episode_id: return self.playlist_result( OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE), str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description')) episode = self._call_api(f'episodes/{episode_id}', episode_id) return self._parse_episode(episode, podcast) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/podomatic.py�����������������������������������������������������0000664�0000000�0000000�00000005025�14675634471�0021214�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import int_or_none class PodomaticIE(InfoExtractor): _WORKING = False IE_NAME = 'podomatic' _VALID_URL = r'''(?x) (?P<proto>https?):// (?: (?P<channel>[^.]+)\.podomatic\.com/entry| (?:www\.)?podomatic\.com/podcasts/(?P<channel_2>[^/]+)/episodes )/ (?P<id>[^/?#&]+) ''' _TESTS = [{ 'url': 'http://scienceteachingtips.podomatic.com/entry/2009-01-02T16_03_35-08_00', 'md5': '84bb855fcf3429e6bf72460e1eed782d', 'info_dict': { 'id': '2009-01-02T16_03_35-08_00', 'ext': 'mp3', 'uploader': 'Science Teaching Tips', 'uploader_id': 'scienceteachingtips', 'title': '64. When the Moon Hits Your Eye', 'duration': 446, }, }, { 'url': 'http://ostbahnhof.podomatic.com/entry/2013-11-15T16_31_21-08_00', 'md5': 'd2cf443931b6148e27638650e2638297', 'info_dict': { 'id': '2013-11-15T16_31_21-08_00', 'ext': 'mp3', 'uploader': 'Ostbahnhof / Techno Mix', 'uploader_id': 'ostbahnhof', 'title': 'Einunddreizig', 'duration': 3799, }, }, { 'url': 'https://www.podomatic.com/podcasts/scienceteachingtips/episodes/2009-01-02T16_03_35-08_00', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') channel = mobj.group('channel') or mobj.group('channel_2') json_url = ('{}://{}.podomatic.com/entry/embed_params/{}?permalink=true&rtmp=0'.format( mobj.group('proto'), channel, video_id)) data_json = self._download_webpage( json_url, video_id, 'Downloading video info') data = json.loads(data_json) video_url = data['downloadLink'] if not video_url: video_url = '{}/{}'.format(data['streamer'].replace('rtmp', 'http'), data['mediaLocation']) uploader = data['podcast'] title = data['title'] thumbnail = data['imageLocation'] duration = int_or_none(data.get('length'), 1000) return { 'id': video_id, 'url': video_url, 'title': title, 'uploader': uploader, 'uploader_id': channel, 'thumbnail': thumbnail, 'duration': duration, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pokemon.py�������������������������������������������������������0000664�0000000�0000000�00000012544�14675634471�0020711�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, extract_attributes, int_or_none, js_to_json, merge_dicts, ) class PokemonIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pokemon\.com/[a-z]{2}(?:.*?play=(?P<id>[a-z0-9]{32})|/(?:[^/]+/)+(?P<display_id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.pokemon.com/us/pokemon-episodes/20_30-the-ol-raise-and-switch/', 'md5': '2fe8eaec69768b25ef898cda9c43062e', 'info_dict': { 'id': 'afe22e30f01c41f49d4f1d9eab5cd9a4', 'ext': 'mp4', 'title': 'The Ol’ Raise and Switch!', 'description': 'md5:7db77f7107f98ba88401d3adc80ff7af', }, 'add_id': ['LimelightMedia'], }, { # no data-video-title 'url': 'https://www.pokemon.com/fr/episodes-pokemon/films-pokemon/pokemon-lascension-de-darkrai-2008', 'info_dict': { 'id': 'dfbaf830d7e54e179837c50c0c6cc0e1', 'ext': 'mp4', 'title': "Pokémon : L'ascension de Darkrai", 'description': 'md5:d1dbc9e206070c3e14a06ff557659fb5', }, 'add_id': ['LimelightMedia'], 'params': { 'skip_download': True, }, }, { 'url': 'http://www.pokemon.com/uk/pokemon-episodes/?play=2e8b5c761f1d4a9286165d7748c1ece2', 'only_matching': True, }, { 'url': 'http://www.pokemon.com/fr/episodes-pokemon/18_09-un-hiver-inattendu/', 'only_matching': True, }, { 'url': 'http://www.pokemon.com/de/pokemon-folgen/01_20-bye-bye-smettbo/', 'only_matching': True, }] def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id or display_id) video_data = extract_attributes(self._search_regex( r'(<[^>]+data-video-id="{}"[^>]*>)'.format(video_id if video_id else '[a-z0-9]{32}'), webpage, 'video data element')) video_id = video_data['data-video-id'] title = video_data.get('data-video-title') or self._html_search_meta( 'pkm-title', webpage, ' title', default=None) or self._search_regex( r'<h1[^>]+\bclass=["\']us-title[^>]+>([^<]+)', webpage, 'title') return { '_type': 'url_transparent', 'id': video_id, 'url': f'limelight:media:{video_id}', 'title': title, 'description': video_data.get('data-video-summary'), 'thumbnail': video_data.get('data-video-poster'), 'series': 'Pokémon', 'season_number': int_or_none(video_data.get('data-video-season')), 'episode': title, 'episode_number': int_or_none(video_data.get('data-video-episode')), 'ie_key': 'LimelightMedia', } class PokemonWatchIE(InfoExtractor): _VALID_URL = r'https?://watch\.pokemon\.com/[a-z]{2}-[a-z]{2}/(?:#/)?player(?:\.html)?\?id=(?P<id>[a-z0-9]{32})' _API_URL = 'https://www.pokemon.com/api/pokemontv/v2/channels/{0:}' _TESTS = [{ 'url': 'https://watch.pokemon.com/en-us/player.html?id=8309a40969894a8e8d5bc1311e9c5667', 'md5': '62833938a31e61ab49ada92f524c42ff', 'info_dict': { 'id': '8309a40969894a8e8d5bc1311e9c5667', 'ext': 'mp4', 'title': 'Lillier and the Staff!', 'description': 'md5:338841b8c21b283d24bdc9b568849f04', }, }, { 'url': 'https://watch.pokemon.com/en-us/#/player?id=3fe7752ba09141f0b0f7756d1981c6b2', 'only_matching': True, }, { 'url': 'https://watch.pokemon.com/de-de/player.html?id=b3c402e111a4459eb47e12160ab0ba07', 'only_matching': True, }] def _extract_media(self, channel_array, video_id): for channel in channel_array: for media in channel.get('media'): if media.get('id') == video_id: return media return None def _real_extract(self, url): video_id = self._match_id(url) info = { '_type': 'url', 'id': video_id, 'url': f'limelight:media:{video_id}', 'ie_key': 'LimelightMedia', } # API call can be avoided entirely if we are listing formats if self.get_param('listformats', False): return info webpage = self._download_webpage(url, video_id) build_vars = self._parse_json(self._search_regex( r'(?s)buildVars\s*=\s*({.*?})', webpage, 'build vars'), video_id, transform_source=js_to_json) region = build_vars.get('region') channel_array = self._download_json(self._API_URL.format(region), video_id) video_data = self._extract_media(channel_array, video_id) if video_data is None: raise ExtractorError( f'Video {video_id} does not exist', expected=True) info['_type'] = 'url_transparent' images = video_data.get('images') return merge_dicts(info, { 'title': video_data.get('title'), 'description': video_data.get('description'), 'thumbnail': images.get('medium') or images.get('small'), 'series': 'Pokémon', 'season_number': int_or_none(video_data.get('season')), 'episode': video_data.get('title'), 'episode_number': int_or_none(video_data.get('episode')), }) ������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pokergo.py�������������������������������������������������������0000664�0000000�0000000�00000010263�14675634471�0020703�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 from .common import InfoExtractor from ..utils import ( ExtractorError, try_get, ) from ..utils.traversal import traverse_obj class PokerGoBaseIE(InfoExtractor): _NETRC_MACHINE = 'pokergo' _AUTH_TOKEN = None _PROPERTY_ID = '1dfb3940-7d53-4980-b0b0-f28b369a000d' def _perform_login(self, username, password): if self._AUTH_TOKEN: return self.report_login() PokerGoBaseIE._AUTH_TOKEN = self._download_json( f'https://subscription.pokergo.com/properties/{self._PROPERTY_ID}/sign-in', None, headers={'authorization': f'Basic {base64.b64encode(f"{username}:{password}".encode()).decode()}'}, data=b'')['meta']['token'] if not self._AUTH_TOKEN: raise ExtractorError('Unable to get Auth Token.', expected=True) def _real_initialize(self): if not self._AUTH_TOKEN: self.raise_login_required(method='password') class PokerGoIE(PokerGoBaseIE): _VALID_URL = r'https?://(?:www\.)?pokergo\.com/videos/(?P<id>[^&$#/?]+)' _TESTS = [{ 'url': 'https://www.pokergo.com/videos/2a70ec4e-4a80-414b-97ec-725d9b72a7dc', 'info_dict': { 'id': 'aVLOxDzY', 'ext': 'mp4', 'title': 'Poker After Dark | Season 12 (2020) | Cry Me a River | Episode 2', 'description': 'md5:c7a8c29556cbfb6eb3c0d5d622251b71', 'thumbnail': 'https://cdn.jwplayer.com/v2/media/aVLOxDzY/poster.jpg?width=720', 'timestamp': 1608085715, 'duration': 2700.12, 'season_number': 12, 'episode_number': 2, 'series': 'poker after dark', 'upload_date': '20201216', 'season': 'Season 12', 'episode': 'Episode 2', 'display_id': '2a70ec4e-4a80-414b-97ec-725d9b72a7dc', }, 'params': {'skip_download': True}, }] def _real_extract(self, url): video_id = self._match_id(url) data_json = self._download_json( f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/videos/{video_id}', video_id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] v_id = data_json['source'] thumbnails = [{ 'url': image['url'], 'id': image.get('label'), 'width': image.get('width'), 'height': image.get('height'), } for image in data_json.get('images') or [] if image.get('url')] series_json = traverse_obj(data_json, ('show_tags', lambda _, v: v['video_id'] == video_id, any)) or {} return { '_type': 'url_transparent', 'display_id': video_id, 'title': data_json.get('title'), 'description': data_json.get('description'), 'duration': data_json.get('duration'), 'thumbnails': thumbnails, 'season_number': series_json.get('season'), 'episode_number': series_json.get('episode_number'), 'series': try_get(series_json, lambda x: x['tag']['name']), 'url': f'https://cdn.jwplayer.com/v2/media/{v_id}', } class PokerGoCollectionIE(PokerGoBaseIE): _VALID_URL = r'https?://(?:www\.)?pokergo\.com/collections/(?P<id>[^&$#/?]+)' _TESTS = [{ 'url': 'https://www.pokergo.com/collections/19ffe481-5dae-481a-8869-75cc0e3c4700', 'playlist_mincount': 13, 'info_dict': { 'id': '19ffe481-5dae-481a-8869-75cc0e3c4700', }, }] def _entries(self, playlist_id): data_json = self._download_json( f'https://api.pokergo.com/v2/properties/{self._PROPERTY_ID}/collections/{playlist_id}?include=entities', playlist_id, headers={'authorization': f'Bearer {self._AUTH_TOKEN}'})['data'] for video in data_json.get('collection_video') or []: video_id = video.get('id') if video_id: yield self.url_result( f'https://www.pokergo.com/videos/{video_id}', ie=PokerGoIE.ie_key(), video_id=video_id) def _real_extract(self, url): playlist_id = self._match_id(url) return self.playlist_result(self._entries(playlist_id), playlist_id=playlist_id) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/polsatgo.py������������������������������������������������������0000664�0000000�0000000�00000006111�14675634471�0021062�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import uuid from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, try_get, url_or_none, ) class PolsatGoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?polsat(?:box)?go\.pl/.+/(?P<id>[0-9a-fA-F]+)(?:[/#?]|$)' _TESTS = [{ 'url': 'https://polsatgo.pl/wideo/seriale/swiat-wedlug-kiepskich/5024045/sezon-1/5028300/swiat-wedlug-kiepskich-odcinek-88/4121', 'info_dict': { 'id': '4121', 'ext': 'mp4', 'title': 'Świat według Kiepskich - Odcinek 88', 'age_limit': 12, }, }] def _extract_formats(self, sources, video_id): for source in sources or []: if not source.get('id'): continue url = url_or_none(self._call_api( 'drm', video_id, 'getPseudoLicense', {'mediaId': video_id, 'sourceId': source['id']}).get('url')) if not url: continue yield { 'url': url, 'height': int_or_none(try_get(source, lambda x: x['quality'][:-1])), } def _real_extract(self, url): video_id = self._match_id(url) media = self._call_api('navigation', video_id, 'prePlayData', {'mediaId': video_id})['mediaItem'] formats = list(self._extract_formats( try_get(media, lambda x: x['playback']['mediaSources']), video_id)) return { 'id': video_id, 'title': media['displayInfo']['title'], 'formats': formats, 'age_limit': int_or_none(media['displayInfo']['ageGroup']), } def _call_api(self, endpoint, media_id, method, params): rand_uuid = str(uuid.uuid4()) res = self._download_json( f'https://b2c-mobile.redefine.pl/rpc/{endpoint}/', media_id, note=f'Downloading {method} JSON metadata', data=json.dumps({ 'method': method, 'id': '2137', 'jsonrpc': '2.0', 'params': { **params, 'userAgentData': { 'deviceType': 'mobile', 'application': 'native', 'os': 'android', 'build': 10003, 'widevine': False, 'portal': 'pg', 'player': 'cpplayer', }, 'deviceId': { 'type': 'other', 'value': rand_uuid, }, 'clientId': rand_uuid, 'cpid': 1, }, }).encode(), headers={'Content-type': 'application/json'}) if not res.get('result'): if res['error']['code'] == 13404: raise ExtractorError('This video is either unavailable in your region or is DRM protected', expected=True) raise ExtractorError(f'Solorz said: {res["error"]["message"]} - {res["error"]["data"]["userMessage"]}') return res['result'] �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/polskieradio.py��������������������������������������������������0000664�0000000�0000000�00000057624�14675634471�0021736�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools import json import math import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, InAdvancePagedList, determine_ext, extract_attributes, int_or_none, js_to_json, parse_iso8601, strip_or_none, traverse_obj, unescapeHTML, unified_timestamp, url_or_none, urljoin, ) class PolskieRadioBaseExtractor(InfoExtractor): def _extract_webpage_player_entries(self, webpage, playlist_id, base_data): media_urls = set() for data_media in re.findall(r'<[^>]+data-media="?({[^>]+})"?', webpage): media = self._parse_json(data_media, playlist_id, transform_source=unescapeHTML, fatal=False) if not media.get('file') or not media.get('desc'): continue media_url = self._proto_relative_url(media['file']) if media_url in media_urls: continue media_urls.add(media_url) entry = base_data.copy() entry.update({ 'id': str(media['id']), 'url': media_url, 'duration': int_or_none(media.get('length')), 'vcodec': 'none' if media.get('provider') == 'audio' else None, }) entry_title = urllib.parse.unquote(media['desc']) if entry_title: entry['title'] = entry_title yield entry class PolskieRadioLegacyIE(PolskieRadioBaseExtractor): # legacy sites IE_NAME = 'polskieradio:legacy' _VALID_URL = r'https?://(?:www\.)?polskieradio(?:24)?\.pl/\d+/\d+/[Aa]rtykul/(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.polskieradio.pl/8/2382/Artykul/2534482,Zagarysci-Poezja-jak-spoiwo', 'info_dict': { 'id': '2534482', 'title': 'Żagaryści. Poezja jak spoiwo', 'description': 'md5:f18d95d5dcba747a09b635e21a4c0695', }, 'playlist': [{ 'md5': 'd07559829f61d5a93a75755987ded760', 'info_dict': { 'id': '2516679', 'ext': 'mp3', 'title': 'md5:c6e1234e0b747ad883cb91b7ad06b98c', 'timestamp': 1592654400, 'upload_date': '20200620', 'duration': 1430, 'thumbnail': r're:^https?://static\.prsa\.pl/images/.*\.jpg$', }, }], }, { # PR4 audition - other frontend 'url': 'https://www.polskieradio.pl/10/6071/Artykul/2610977,Poglos-29-pazdziernika-godz-2301', 'info_dict': { 'id': '2610977', 'ext': 'mp3', 'title': 'Pogłos 29 października godz. 23:01', }, }, { 'url': 'https://polskieradio24.pl/130/4503/Artykul/2621876,Narusza-nasza-suwerennosc-Publicysci-o-uzaleznieniu-funduszy-UE-od-praworzadnosci', 'only_matching': True, }] def _real_extract(self, url): playlist_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, playlist_id) if PolskieRadioIE.suitable(urlh.url): return self.url_result(urlh.url, PolskieRadioIE, playlist_id) content = self._search_regex( r'(?s)<div[^>]+class="\s*this-article\s*"[^>]*>(.+?)<div[^>]+class="tags"[^>]*>', webpage, 'content', default=None) timestamp = unified_timestamp(self._html_search_regex( r'(?s)<span[^>]+id="datetime2"[^>]*>(.+?)</span>', webpage, 'timestamp', default=None)) thumbnail_url = self._og_search_thumbnail(webpage, default=None) title = self._og_search_title(webpage).strip() description = strip_or_none(self._og_search_description(webpage, default=None)) description = description.replace('\xa0', ' ') if description is not None else None if not content: return { 'id': playlist_id, 'url': self._proto_relative_url( self._search_regex( r"source:\s*'(//static\.prsa\.pl/[^']+)'", webpage, 'audition record url')), 'title': title, 'description': description, 'timestamp': timestamp, 'thumbnail': thumbnail_url, } entries = self._extract_webpage_player_entries(content, playlist_id, { 'title': title, 'timestamp': timestamp, 'thumbnail': thumbnail_url, }) return self.playlist_result(entries, playlist_id, title, description) class PolskieRadioIE(PolskieRadioBaseExtractor): # new next.js sites _VALID_URL = r'https?://(?:[^/]+\.)?(?:polskieradio(?:24)?|radiokierowcow)\.pl/artykul/(?P<id>\d+)' _TESTS = [{ # articleData, attachments 'url': 'https://jedynka.polskieradio.pl/artykul/1587943', 'info_dict': { 'id': '1587943', 'title': 'Prof. Andrzej Nowak: o historii nie da się myśleć beznamiętnie', 'description': 'md5:12f954edbf3120c5e7075e17bf9fc5c5', }, 'playlist': [{ 'md5': '2984ee6ce9046d91fc233bc1a864a09a', 'info_dict': { 'id': '7a85d429-5356-4def-a347-925e4ae7406b', 'ext': 'mp3', 'title': 'md5:d4623290d4ac983bf924061c75c23a0d', }, }], }, { # post, legacy html players 'url': 'https://trojka.polskieradio.pl/artykul/2589163,Czy-wciaz-otrzymujemy-zdjecia-z-sond-Voyager', 'info_dict': { 'id': '2589163', 'title': 'Czy wciąż otrzymujemy zdjęcia z sond Voyager?', 'description': 'md5:cf1a7f348d63a2db9c0d7a63d1669473', }, 'playlist': [{ 'info_dict': { 'id': '2577880', 'ext': 'mp3', 'title': 'md5:a57d10a0c02abd34dd675cb33707ad5a', 'duration': 321, }, }], }, { # data, legacy 'url': 'https://radiokierowcow.pl/artykul/2694529', 'info_dict': { 'id': '2694529', 'title': 'Zielona fala reliktem przeszłości?', 'description': 'md5:f20a9a7ed9cb58916c54add94eae3bc0', }, 'playlist_count': 3, }, { 'url': 'https://trojka.polskieradio.pl/artykul/1632955', 'only_matching': True, }, { # with mp4 video 'url': 'https://trojka.polskieradio.pl/artykul/1634903', 'only_matching': True, }, { 'url': 'https://jedynka.polskieradio.pl/artykul/3042436,Polityka-wschodnia-ojca-i-syna-Wladyslawa-Lokietka-i-Kazimierza-Wielkiego', 'only_matching': True, }] def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) article_data = traverse_obj( self._search_nextjs_data(webpage, playlist_id), ( 'props', 'pageProps', (('data', 'articleData'), 'post', 'data')), get_all=False) title = strip_or_none(article_data['title']) description = strip_or_none(article_data.get('lead')) entries = [{ 'url': entry['file'], 'ext': determine_ext(entry.get('fileName')), 'id': self._search_regex( r'([a-f\d]{8}-(?:[a-f\d]{4}-){3}[a-f\d]{12})', entry['file'], 'entry id'), 'title': strip_or_none(entry.get('description')) or title, } for entry in article_data.get('attachments') or () if entry.get('fileType') in ('Audio', )] if not entries: # some legacy articles have no json attachments, but players in body entries = self._extract_webpage_player_entries(article_data['content'], playlist_id, { 'title': title, }) return self.playlist_result(entries, playlist_id, title, description) class PolskieRadioAuditionIE(InfoExtractor): # new next.js sites IE_NAME = 'polskieradio:audition' _VALID_URL = r'https?://(?:[^/]+\.)?polskieradio\.pl/audycj[ae]/(?P<id>\d+)' _TESTS = [{ # articles, PR1 'url': 'https://jedynka.polskieradio.pl/audycje/5102', 'info_dict': { 'id': '5102', 'title': 'Historia żywa', 'thumbnail': r're:https://static\.prsa\.pl/images/.+', }, 'playlist_mincount': 38, }, { # episodes, PR1 'url': 'https://jedynka.polskieradio.pl/audycje/5769', 'info_dict': { 'id': '5769', 'title': 'AgroFakty', 'thumbnail': r're:https://static\.prsa\.pl/images/.+', }, 'playlist_mincount': 269, }, { # both episodes and articles, PR3 'url': 'https://trojka.polskieradio.pl/audycja/8906', 'info_dict': { 'id': '8906', 'title': 'Trójka budzi', 'thumbnail': r're:https://static\.prsa\.pl/images/.+', }, 'playlist_mincount': 722, }, { # some articles were "promoted to main page" and thus link to old frontend 'url': 'https://trojka.polskieradio.pl/audycja/305', 'info_dict': { 'id': '305', 'title': 'Co w mowie piszczy?', 'thumbnail': r're:https://static\.prsa\.pl/images/.+', }, 'playlist_count': 1523, }] def _call_lp3(self, path, query, video_id, note): return self._download_json( f'https://lp3test.polskieradio.pl/{path}', video_id, note, query=query, headers={'x-api-key': '9bf6c5a2-a7d0-4980-9ed7-a3f7291f2a81'}) def _entries(self, playlist_id, has_episodes, has_articles): for i in itertools.count(0) if has_episodes else []: page = self._call_lp3( 'AudioArticle/GetListByCategoryId', { 'categoryId': playlist_id, 'PageSize': 10, 'skip': i, 'format': 400, }, playlist_id, f'Downloading episode list page {i + 1}') if not traverse_obj(page, 'data'): break for episode in page['data']: yield { 'id': str(episode['id']), 'url': episode['file'], 'title': episode.get('title'), 'duration': int_or_none(episode.get('duration')), 'timestamp': parse_iso8601(episode.get('datePublic')), } for i in itertools.count(0) if has_articles else []: page = self._call_lp3( 'Article/GetListByCategoryId', { 'categoryId': playlist_id, 'PageSize': 9, 'skip': i, 'format': 400, }, playlist_id, f'Downloading article list page {i + 1}') if not traverse_obj(page, 'data'): break for article in page['data']: yield { '_type': 'url_transparent', 'id': str(article['id']), 'url': article['url'], 'title': article.get('shortTitle'), 'description': traverse_obj(article, ('description', 'lead')), 'timestamp': parse_iso8601(article.get('datePublic')), } def _real_extract(self, url): playlist_id = self._match_id(url) page_props = traverse_obj( self._search_nextjs_data(self._download_webpage(url, playlist_id), playlist_id), ('props', 'pageProps', ('data', None)), get_all=False) has_episodes = bool(traverse_obj(page_props, 'episodes', 'audios')) has_articles = bool(traverse_obj(page_props, 'articles')) return self.playlist_result( self._entries(playlist_id, has_episodes, has_articles), playlist_id, title=traverse_obj(page_props, ('details', 'name')), description=traverse_obj(page_props, ('details', 'description', 'lead')), thumbnail=traverse_obj(page_props, ('details', 'photo'))) class PolskieRadioCategoryIE(InfoExtractor): # legacy sites IE_NAME = 'polskieradio:category' _VALID_URL = r'https?://(?:www\.)?polskieradio\.pl/(?:\d+(?:,[^/]+)?/|[^/]+/Tag)(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.polskieradio.pl/37,RedakcjaKatolicka/4143,Kierunek-Krakow', 'info_dict': { 'id': '4143', 'title': 'Kierunek Kraków', }, 'playlist_mincount': 61, }, { 'url': 'http://www.polskieradio.pl/10,czworka/214,muzyka', 'info_dict': { 'id': '214', 'title': 'Muzyka', }, 'playlist_mincount': 61, }, { # billennium tabs 'url': 'https://www.polskieradio.pl/8/2385', 'info_dict': { 'id': '2385', 'title': 'Droga przez mąkę', }, 'playlist_mincount': 111, }, { 'url': 'https://www.polskieradio.pl/10/4930', 'info_dict': { 'id': '4930', 'title': 'Teraz K-pop!', }, 'playlist_mincount': 392, }, { # post back pages, audio content directly without articles 'url': 'https://www.polskieradio.pl/8,dwojka/7376,nowa-mowa', 'info_dict': { 'id': '7376', 'title': 'Nowa mowa', }, 'playlist_mincount': 244, }, { 'url': 'https://www.polskieradio.pl/Krzysztof-Dziuba/Tag175458', 'info_dict': { 'id': '175458', 'title': 'Krzysztof Dziuba', }, 'playlist_mincount': 420, }, { 'url': 'http://www.polskieradio.pl/8,Dwojka/196,Publicystyka', 'only_matching': True, }] @classmethod def suitable(cls, url): return False if PolskieRadioLegacyIE.suitable(url) else super().suitable(url) def _entries(self, url, page, category_id): content = page is_billennium_tabs = 'onclick="TB_LoadTab(' in page is_post_back = 'onclick="__doPostBack(' in page pagination = page if is_billennium_tabs else None for page_num in itertools.count(2): for a_entry, entry_id in re.findall( r'(?s)<article[^>]+>.*?(<a[^>]+href=["\'](?:(?:https?)?://[^/]+)?/\d+/\d+/Artykul/(\d+)[^>]+>).*?</article>', content): entry = extract_attributes(a_entry) if entry.get('href'): yield self.url_result( urljoin(url, entry['href']), PolskieRadioLegacyIE, entry_id, entry.get('title')) for a_entry in re.findall(r'<span data-media=({[^ ]+})', content): yield traverse_obj(self._parse_json(a_entry, category_id), { 'url': 'file', 'id': 'uid', 'duration': 'length', 'title': ('title', {urllib.parse.unquote}), 'description': ('desc', {urllib.parse.unquote}), }) if is_billennium_tabs: params = self._search_json( r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+onclick=["\']TB_LoadTab\(', pagination, 'next page params', category_id, default=None, close_objects=1, contains_pattern='.+', transform_source=lambda x: f'[{js_to_json(unescapeHTML(x))}') if not params: break tab_content = self._download_json( 'https://www.polskieradio.pl/CMS/TemplateBoxesManagement/TemplateBoxTabContent.aspx/GetTabContent', category_id, f'Downloading page {page_num}', headers={'content-type': 'application/json'}, data=json.dumps(dict(zip(( 'boxInstanceId', 'tabId', 'categoryType', 'sectionId', 'categoryId', 'pagerMode', 'subjectIds', 'tagIndexId', 'queryString', 'name', 'openArticlesInParentTemplate', 'idSectionFromUrl', 'maxDocumentAge', 'showCategoryForArticle', 'pageNumber', ), params))).encode())['d'] content, pagination = tab_content['Content'], tab_content.get('PagerContent') elif is_post_back: target = self._search_regex( r'onclick=(?:["\'])__doPostBack\((?P<q1>["\'])(?P<target>[\w$]+)(?P=q1)\s*,\s*(?P<q2>["\'])Next(?P=q2)', content, 'pagination postback target', group='target', default=None) if not target: break content = self._download_webpage( url, category_id, f'Downloading page {page_num}', data=urllib.parse.urlencode({ **self._hidden_inputs(content), '__EVENTTARGET': target, '__EVENTARGUMENT': 'Next', }).encode()) else: next_url = urljoin(url, self._search_regex( r'<div[^>]+class=["\']next["\'][^>]*>\s*<a[^>]+href=(["\'])(?P<url>(?:(?!\1).)+)\1', content, 'next page url', group='url', default=None)) if not next_url: break content = self._download_webpage(next_url, category_id, f'Downloading page {page_num}') def _real_extract(self, url): category_id = self._match_id(url) webpage, urlh = self._download_webpage_handle(url, category_id) if PolskieRadioAuditionIE.suitable(urlh.url): return self.url_result(urlh.url, PolskieRadioAuditionIE, category_id) title = self._html_search_regex( r'<title>([^<]+)(?: - [^<]+ - [^<]+| w [Pp]olskie[Rr]adio\.pl\s*)', webpage, 'title', fatal=False) return self.playlist_result( self._entries(url, webpage, category_id), category_id, title) class PolskieRadioPlayerIE(InfoExtractor): IE_NAME = 'polskieradio:player' _VALID_URL = r'https?://player\.polskieradio\.pl/anteny/(?P[^/]+)' _BASE_URL = 'https://player.polskieradio.pl' _PLAYER_URL = 'https://player.polskieradio.pl/main.bundle.js' _STATIONS_API_URL = 'https://apipr.polskieradio.pl/api/stacje' _TESTS = [{ 'url': 'https://player.polskieradio.pl/anteny/trojka', 'info_dict': { 'id': '3', 'ext': 'm4a', 'title': 'Trójka', }, 'params': { 'format': 'bestaudio', 'skip_download': 'endless stream', }, }] def _get_channel_list(self, channel_url='no_channel'): player_code = self._download_webpage( self._PLAYER_URL, channel_url, note='Downloading js player') channel_list = js_to_json(self._search_regex( r';var r="anteny",a=(\[.+?\])},', player_code, 'channel list')) return self._parse_json(channel_list, channel_url) def _real_extract(self, url): channel_url = self._match_id(url) channel_list = self._get_channel_list(channel_url) channel = next((c for c in channel_list if c.get('url') == channel_url), None) if not channel: raise ExtractorError('Channel not found') station_list = self._download_json(self._STATIONS_API_URL, channel_url, note='Downloading stream url list', headers={ 'Accept': 'application/json', 'Referer': url, 'Origin': self._BASE_URL, }) station = next((s for s in station_list if s.get('Name') == (channel.get('streamName') or channel.get('name'))), None) if not station: raise ExtractorError('Station not found even though we extracted channel') formats = [] for stream_url in station['Streams']: stream_url = self._proto_relative_url(stream_url) if stream_url.endswith('/playlist.m3u8'): formats.extend(self._extract_m3u8_formats(stream_url, channel_url, live=True)) elif stream_url.endswith('/manifest.f4m'): formats.extend(self._extract_mpd_formats(stream_url, channel_url)) elif stream_url.endswith('/Manifest'): formats.extend(self._extract_ism_formats(stream_url, channel_url)) else: formats.append({ 'url': stream_url, }) return { 'id': str(channel['id']), 'formats': formats, 'title': channel.get('name') or channel.get('streamName'), 'display_id': channel_url, 'thumbnail': f'{self._BASE_URL}/images/{channel_url}-color-logo.png', 'is_live': True, } class PolskieRadioPodcastBaseExtractor(InfoExtractor): _API_BASE = 'https://apipodcasts.polskieradio.pl/api' def _parse_episode(self, data): return { 'id': data['guid'], 'formats': [{ 'url': data['url'], 'filesize': int_or_none(data.get('fileSize')), }], 'title': data['title'], 'description': data.get('description'), 'duration': int_or_none(data.get('length')), 'timestamp': parse_iso8601(data.get('publishDate')), 'thumbnail': url_or_none(data.get('image')), 'series': data.get('podcastTitle'), 'episode': data['title'], } class PolskieRadioPodcastListIE(PolskieRadioPodcastBaseExtractor): IE_NAME = 'polskieradio:podcast:list' _VALID_URL = r'https?://podcasty\.polskieradio\.pl/podcast/(?P\d+)' _TESTS = [{ 'url': 'https://podcasty.polskieradio.pl/podcast/8/', 'info_dict': { 'id': '8', 'title': 'Śniadanie w Trójce', 'description': 'md5:57abcc27bc4c6a6b25baa3061975b9ef', 'uploader': 'Beata Michniewicz', }, 'playlist_mincount': 714, }] _PAGE_SIZE = 10 def _call_api(self, podcast_id, page): return self._download_json( f'{self._API_BASE}/Podcasts/{podcast_id}/?pageSize={self._PAGE_SIZE}&page={page}', podcast_id, f'Downloading page {page}') def _real_extract(self, url): podcast_id = self._match_id(url) data = self._call_api(podcast_id, 1) def get_page(page_num): page_data = self._call_api(podcast_id, page_num + 1) if page_num else data yield from (self._parse_episode(ep) for ep in page_data['items']) return { '_type': 'playlist', 'entries': InAdvancePagedList( get_page, math.ceil(data['itemCount'] / self._PAGE_SIZE), self._PAGE_SIZE), 'id': str(data['id']), 'title': data.get('title'), 'description': data.get('description'), 'uploader': data.get('announcer'), } class PolskieRadioPodcastIE(PolskieRadioPodcastBaseExtractor): IE_NAME = 'polskieradio:podcast' _VALID_URL = r'https?://podcasty\.polskieradio\.pl/track/(?P[a-f\d]{8}(?:-[a-f\d]{4}){4}[a-f\d]{8})' _TESTS = [{ 'url': 'https://podcasty.polskieradio.pl/track/6eafe403-cb8f-4756-b896-4455c3713c32', 'info_dict': { 'id': '6eafe403-cb8f-4756-b896-4455c3713c32', 'ext': 'mp3', 'title': 'Theresa May rezygnuje. Co dalej z brexitem?', 'description': 'md5:e41c409a29d022b70ef0faa61dbded60', 'episode': 'Theresa May rezygnuje. Co dalej z brexitem?', 'duration': 2893, 'thumbnail': 'https://static.prsa.pl/images/58649376-c8a0-4ba2-a714-78b383285f5f.jpg', 'series': 'Raport o stanie świata', }, }] def _real_extract(self, url): podcast_id = self._match_id(url) data = self._download_json( f'{self._API_BASE}/audio', podcast_id, 'Downloading podcast metadata', data=json.dumps({ 'guids': [podcast_id], }).encode(), headers={ 'Content-Type': 'application/json', }) return self._parse_episode(data[0]) yt-dlp-2024.09.27/yt_dlp/extractor/popcorntimes.py000066400000000000000000000062431467563447100217620ustar00rootroot00000000000000import base64 from .common import InfoExtractor from ..utils import int_or_none class PopcorntimesIE(InfoExtractor): _VALID_URL = r'https?://popcorntimes\.tv/[^/]+/m/(?P[^/]+)/(?P[^/?#&]+)' _TEST = { 'url': 'https://popcorntimes.tv/de/m/A1XCFvz/haensel-und-gretel-opera-fantasy', 'md5': '93f210991ad94ba8c3485950a2453257', 'info_dict': { 'id': 'A1XCFvz', 'display_id': 'haensel-und-gretel-opera-fantasy', 'ext': 'mp4', 'title': 'Hänsel und Gretel', 'description': 'md5:1b8146791726342e7b22ce8125cf6945', 'thumbnail': r're:^https?://.*\.jpg$', 'creator': 'John Paul', 'release_date': '19541009', 'duration': 4260, 'tbr': 5380, 'width': 720, 'height': 540, }, } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id, display_id = mobj.group('id', 'display_id') webpage = self._download_webpage(url, display_id) title = self._search_regex( r'

([^<]+)', webpage, 'title', default=None) or self._html_search_meta( 'ya:ovs:original_name', webpage, 'title', fatal=True) loc = self._search_regex( r'PCTMLOC\s*=\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'loc', group='value') loc_b64 = '' for c in loc: c_ord = ord(c) if ord('a') <= c_ord <= ord('z') or ord('A') <= c_ord <= ord('Z'): upper = ord('Z') if c_ord <= ord('Z') else ord('z') c_ord += 13 if upper < c_ord: c_ord -= 26 loc_b64 += chr(c_ord) video_url = base64.b64decode(loc_b64).decode('utf-8') description = self._html_search_regex( r'(?s)]+class=["\']pt-movie-desc[^>]+>(.+?)', webpage, 'description', fatal=False) thumbnail = self._search_regex( r']+class=["\']video-preview[^>]+\bsrc=(["\'])(?P(?:(?!\1).)+)\1', webpage, 'thumbnail', default=None, group='value') or self._og_search_thumbnail(webpage) creator = self._html_search_meta( 'video:director', webpage, 'creator', default=None) release_date = self._html_search_meta( 'video:release_date', webpage, default=None) if release_date: release_date = release_date.replace('-', '') def int_meta(name): return int_or_none(self._html_search_meta( name, webpage, default=None)) return { 'id': video_id, 'display_id': display_id, 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, 'creator': creator, 'release_date': release_date, 'duration': int_meta('video:duration'), 'tbr': int_meta('ya:ovs:bitrate'), 'width': int_meta('og:video:width'), 'height': int_meta('og:video:height'), 'http_headers': { 'Referer': url, }, } yt-dlp-2024.09.27/yt_dlp/extractor/popcorntv.py000066400000000000000000000051071467563447100212700ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( extract_attributes, int_or_none, unified_timestamp, ) class PopcornTVIE(InfoExtractor): _VALID_URL = r'https?://[^/]+\.popcorntv\.it/guarda/(?P[^/]+)/(?P\d+)' _TESTS = [{ 'url': 'https://animemanga.popcorntv.it/guarda/food-wars-battaglie-culinarie-episodio-01/9183', 'md5': '47d65a48d147caf692ab8562fe630b45', 'info_dict': { 'id': '9183', 'display_id': 'food-wars-battaglie-culinarie-episodio-01', 'ext': 'mp4', 'title': 'Food Wars, Battaglie Culinarie | Episodio 01', 'description': 'md5:b8bea378faae4651d3b34c6e112463d0', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1497610857, 'upload_date': '20170616', 'duration': 1440, 'view_count': int, }, }, { 'url': 'https://cinema.popcorntv.it/guarda/smash-cut/10433', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) display_id, video_id = mobj.group('display_id', 'id') webpage = self._download_webpage(url, display_id) m3u8_url = extract_attributes( self._search_regex( r'(]+itemprop=["\'](?:content|embed)Url[^>]*>)', webpage, 'content', ))['href'] formats = self._extract_m3u8_formats( m3u8_url, display_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') title = self._search_regex( r']+itemprop=["\']name[^>]*>([^<]+)', webpage, 'title', default=None) or self._og_search_title(webpage) description = self._html_search_regex( r'(?s)]+itemprop=["\']description[^>]*>(.+?)', webpage, 'description', fatal=False) thumbnail = self._og_search_thumbnail(webpage) timestamp = unified_timestamp(self._html_search_meta( 'uploadDate', webpage, 'timestamp')) duration = int_or_none(self._html_search_meta( 'duration', webpage), invscale=60) view_count = int_or_none(self._html_search_meta( 'interactionCount', webpage, 'view count')) return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'view_count': view_count, 'formats': formats, } yt-dlp-2024.09.27/yt_dlp/extractor/pornbox.py000066400000000000000000000106201467563447100207210ustar00rootroot00000000000000from .common import InfoExtractor from ..compat import functools from ..utils import ( int_or_none, parse_duration, parse_iso8601, qualities, str_or_none, traverse_obj, url_or_none, ) class PornboxIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornbox\.com/application/watch-page/(?P[0-9]+)' _TESTS = [{ 'url': 'https://pornbox.com/application/watch-page/212108', 'md5': '3ff6b6e206f263be4c5e987a3162ac6e', 'info_dict': { 'id': '212108', 'ext': 'mp4', 'title': 'md5:ececc5c6e6c9dd35d290c45fed05fd49', 'uploader': 'Lily Strong', 'timestamp': 1665871200, 'upload_date': '20221015', 'age_limit': 18, 'availability': 'needs_auth', 'duration': 1505, 'cast': ['Lily Strong', 'John Strong'], 'tags': 'count:11', 'description': 'md5:589c7f33e183aa8aa939537300efb859', 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$', }, }, { 'url': 'https://pornbox.com/application/watch-page/216045', 'info_dict': { 'id': '216045', 'title': 'md5:3e48528e73a9a2b12f7a2772ed0b26a2', 'description': 'md5:3e631dcaac029f15ed434e402d1b06c7', 'uploader': 'VK Studio', 'timestamp': 1618264800, 'upload_date': '20210412', 'age_limit': 18, 'availability': 'premium_only', 'duration': 2710, 'cast': 'count:3', 'tags': 'count:29', 'thumbnail': r're:^https?://cdn-image\.gtflixtv\.com.*\.jpg.*$', 'subtitles': 'count:6', }, 'params': { 'skip_download': True, 'ignore_no_formats_error': True, }, 'expected_warnings': [ 'You are either not logged in or do not have access to this scene', 'No video formats found', 'Requested format is not available'], }] def _real_extract(self, url): video_id = self._match_id(url) public_data = self._download_json(f'https://pornbox.com/contents/{video_id}', video_id) subtitles = {country_code: [{ 'url': f'https://pornbox.com/contents/{video_id}/subtitles/{country_code}', 'ext': 'srt', }] for country_code in traverse_obj(public_data, ('subtitles', ..., {str}))} is_free_scene = traverse_obj( public_data, ('price', 'is_available_for_free', {bool}), default=False) metadata = { 'id': video_id, **traverse_obj(public_data, { 'title': ('scene_name', {str.strip}), 'description': ('small_description', {str.strip}), 'uploader': 'studio', 'duration': ('runtime', {parse_duration}), 'cast': (('models', 'male_models'), ..., 'model_name'), 'thumbnail': ('player_poster', {url_or_none}), 'tags': ('niches', ..., 'niche'), }), 'age_limit': 18, 'timestamp': parse_iso8601(traverse_obj( public_data, ('studios', 'release_date'), 'publish_date')), 'availability': self._availability(needs_auth=True, needs_premium=not is_free_scene), 'subtitles': subtitles, } if not public_data.get('is_purchased') or not is_free_scene: self.raise_login_required( 'You are either not logged in or do not have access to this scene', metadata_available=True) return metadata media_id = traverse_obj(public_data, ( 'medias', lambda _, v: v['title'] == 'Full video', 'media_id', {int}), get_all=False) if not media_id: self.raise_no_formats('Could not find stream id', video_id=video_id) stream_data = self._download_json( f'https://pornbox.com/media/{media_id}/stream', video_id=video_id, note='Getting manifest urls') get_quality = qualities(['web', 'vga', 'hd', '1080p', '4k', '8k']) metadata['formats'] = traverse_obj(stream_data, ('qualities', lambda _, v: v['src'], { 'url': 'src', 'vbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}), 'format_id': ('quality', {str_or_none}), 'quality': ('quality', {get_quality}), 'width': ('size', {lambda x: int(x[:-1])}), })) return metadata yt-dlp-2024.09.27/yt_dlp/extractor/pornflip.py000066400000000000000000000066221467563447100210720ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import int_or_none, parse_duration, parse_iso8601 class PornFlipIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pornflip\.com/(?:(embed|sv|v)/)?(?P[^/]+)' _TESTS = [ { 'url': 'https://www.pornflip.com/dzv9Mtw1qj2/sv/brazzers-double-dare-two-couples-fucked-jenna-reid-maya-bijou', 'info_dict': { 'id': 'dzv9Mtw1qj2', 'ext': 'mp4', 'title': 'Brazzers - Double Dare Two couples fucked Jenna Reid Maya Bijou', 'description': 'md5:d2b69e6cc743c5fd158e162aa7f05821', 'duration': 476, 'like_count': int, 'dislike_count': int, 'view_count': int, 'timestamp': 1617846819, 'upload_date': '20210408', 'uploader': 'Brazzers', 'age_limit': 18, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.pornflip.com/v/IrJEC40i21L', 'only_matching': True, }, { 'url': 'https://www.pornflip.com/Z3jzbChC5-P/sexintaxi-e-sereyna-gomez-czech-naked-couple', 'only_matching': True, }, { 'url': 'https://www.pornflip.com/embed/bLcDFxnrZnU', 'only_matching': True, }, ] _HOST = 'www.pornflip.com' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( f'https://{self._HOST}/sv/{video_id}', video_id, headers={'host': self._HOST}) description = self._html_search_regex(r'&p\[summary\]=(.*?)\s*&p', webpage, 'description', fatal=False) duration = self._search_regex(r'"duration":\s+"([^"]+)",', webpage, 'duration', fatal=False) view_count = self._search_regex(r'"interactionCount":\s+"([^"]+)"', webpage, 'view_count', fatal=False) title = self._html_search_regex(r'id="mediaPlayerTitleLink"[^>]*>(.+)', webpage, 'title', fatal=False) uploader = self._html_search_regex(r'class="title-chanel"[^>]*>[^<]*]*>([^<]+)<', webpage, 'uploader', fatal=False) upload_date = self._search_regex(r'"uploadDate":\s+"([^"]+)",', webpage, 'upload_date', fatal=False) likes = self._html_search_regex( r'class="btn btn-up-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'like_count', fatal=False) dislikes = self._html_search_regex( r'class="btn btn-down-rating[^>]*>[^<]*]*>[^<]*[^>]*]*>[^0-9]*([0-9]+)[^<0-9]*<', webpage, 'dislike_count', fatal=False) mpd_url = self._search_regex(r'"([^"]+userscontent.net/dash/[0-9]+/manifest.mpd[^"]*)"', webpage, 'mpd_url').replace('&', '&') formats = self._extract_mpd_formats(mpd_url, video_id, mpd_id='dash') return { 'age_limit': 18, 'description': description, 'dislike_count': int_or_none(dislikes), 'duration': parse_duration(duration), 'formats': formats, 'id': video_id, 'like_count': int_or_none(likes), 'timestamp': parse_iso8601(upload_date), 'thumbnail': self._og_search_thumbnail(webpage), 'title': title, 'uploader': uploader, 'view_count': int_or_none(view_count), } yt-dlp-2024.09.27/yt_dlp/extractor/pornhub.py000066400000000000000000000752401467563447100207200ustar00rootroot00000000000000import functools import itertools import math import operator import re from .common import InfoExtractor from .openload import PhantomJSwrapper from ..networking import Request from ..networking.exceptions import HTTPError from ..utils import ( NO_DEFAULT, ExtractorError, clean_html, determine_ext, format_field, int_or_none, merge_dicts, orderedSet, remove_quotes, remove_start, str_to_int, update_url_query, url_or_none, urlencode_postdata, ) class PornHubBaseIE(InfoExtractor): _NETRC_MACHINE = 'pornhub' _PORNHUB_HOST_RE = r'(?:(?Ppornhub(?:premium)?\.(?:com|net|org))|pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd\.onion)' def _download_webpage_handle(self, *args, **kwargs): def dl(*args, **kwargs): return super(PornHubBaseIE, self)._download_webpage_handle(*args, **kwargs) ret = dl(*args, **kwargs) if not ret: return ret webpage, urlh = ret if any(re.search(p, webpage) for p in ( r']+\bonload=["\']go\(\)', r'document\.cookie\s*=\s*["\']RNKEY=', r'document\.location\.reload\(true\)')): url_or_request = args[0] url = (url_or_request.url if isinstance(url_or_request, Request) else url_or_request) phantom = PhantomJSwrapper(self, required_version='2.0') phantom.get(url, html=webpage) webpage, urlh = dl(*args, **kwargs) return webpage, urlh def _real_initialize(self): self._logged_in = False def _set_age_cookies(self, host): self._set_cookie(host, 'age_verified', '1') self._set_cookie(host, 'accessAgeDisclaimerPH', '1') self._set_cookie(host, 'accessAgeDisclaimerUK', '1') self._set_cookie(host, 'accessPH', '1') def _login(self, host): if self._logged_in: return site = host.split('.')[0] # Both sites pornhub and pornhubpremium have separate accounts # so there should be an option to provide credentials for both. # At the same time some videos are available under the same video id # on both sites so that we have to identify them as the same video. # For that purpose we have to keep both in the same extractor # but under different netrc machines. username, password = self._get_login_info(netrc_machine=site) if username is None: return login_url = 'https://www.{}/{}login'.format(host, 'premium/' if 'premium' in host else '') login_page = self._download_webpage( login_url, None, f'Downloading {site} login page') def is_logged(webpage): return any(re.search(p, webpage) for p in ( r'id="profileMenuDropdown"', r'class="ph-icon-logout"')) if is_logged(login_page): self._logged_in = True return login_form = self._hidden_inputs(login_page) login_form.update({ 'email': username, 'password': password, }) response = self._download_json( f'https://www.{host}/front/authenticate', None, f'Logging in to {site}', data=urlencode_postdata(login_form), headers={ 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Referer': login_url, 'X-Requested-With': 'XMLHttpRequest', }) if response.get('success') == '1': self._logged_in = True return message = response.get('message') if message is not None: raise ExtractorError( f'Unable to login: {message}', expected=True) raise ExtractorError('Unable to log in') class PornHubIE(PornHubBaseIE): IE_DESC = 'PornHub and Thumbzilla' _VALID_URL = rf'''(?x) https?:// (?: (?:[^/]+\.)? {PornHubBaseIE._PORNHUB_HOST_RE} /(?:(?:view_video\.php|video/show)\?viewkey=|embed/)| (?:www\.)?thumbzilla\.com/video/ ) (?P[\da-z]+) ''' _EMBED_REGEX = [r']+?src=["\'](?P(?:https?:)?//(?:www\.)?pornhub(?:premium)?\.(?:com|net|org)/embed/[\da-z]+)'] _TESTS = [{ 'url': 'http://www.pornhub.com/view_video.php?viewkey=648719015', 'md5': 'a6391306d050e4547f62b3f485dd9ba9', 'info_dict': { 'id': '648719015', 'ext': 'mp4', 'title': 'Seductive Indian beauty strips down and fingers her pink pussy', 'uploader': 'Babes', 'upload_date': '20130628', 'timestamp': 1372447216, 'duration': 361, 'view_count': int, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, 'cast': list, }, }, { # non-ASCII title 'url': 'http://www.pornhub.com/view_video.php?viewkey=1331683002', 'info_dict': { 'id': '1331683002', 'ext': 'mp4', 'title': '重庆婷婷女王足交', 'upload_date': '20150213', 'timestamp': 1423804862, 'duration': 1753, 'view_count': int, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, }, 'params': { 'skip_download': True, }, 'skip': 'Video has been flagged for verification in accordance with our trust and safety policy', }, { # subtitles 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5af5fef7c2aa7', 'info_dict': { 'id': 'ph5af5fef7c2aa7', 'ext': 'mp4', 'title': 'BFFS - Cute Teen Girls Share Cock On the Floor', 'uploader': 'BFFs', 'duration': 622, 'view_count': int, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'tags': list, 'categories': list, 'subtitles': { 'en': [{ 'ext': 'srt', }], }, }, 'params': { 'skip_download': True, }, 'skip': 'This video has been disabled', }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph601dc30bae19a', 'info_dict': { 'id': 'ph601dc30bae19a', 'uploader': 'Projekt Melody', 'uploader_id': 'projekt-melody', 'upload_date': '20210205', 'title': '"Welcome to My Pussy Mansion" - CB Stream (02/03/21)', 'thumbnail': r're:https?://.+', }, }, { 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph557bbb6676d2d', 'only_matching': True, }, { # removed at the request of cam4.com 'url': 'http://fr.pornhub.com/view_video.php?viewkey=ph55ca2f9760862', 'only_matching': True, }, { # removed at the request of the copyright owner 'url': 'http://www.pornhub.com/view_video.php?viewkey=788152859', 'only_matching': True, }, { # removed by uploader 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph572716d15a111', 'only_matching': True, }, { # private video 'url': 'http://www.pornhub.com/view_video.php?viewkey=ph56fd731fce6b7', 'only_matching': True, }, { 'url': 'https://www.thumbzilla.com/video/ph56c6114abd99a/horny-girlfriend-sex', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/video/show?viewkey=648719015', 'only_matching': True, }, { 'url': 'https://www.pornhub.net/view_video.php?viewkey=203640933', 'only_matching': True, }, { 'url': 'https://www.pornhub.org/view_video.php?viewkey=203640933', 'only_matching': True, }, { 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5e4acdae54a82', 'only_matching': True, }, { # Some videos are available with the same id on both premium # and non-premium sites (e.g. this and the following test) 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'only_matching': True, }, { 'url': 'https://www.pornhubpremium.com/view_video.php?viewkey=ph5f75b0f4b18e3', 'only_matching': True, }, { # geo restricted 'url': 'https://www.pornhub.com/view_video.php?viewkey=ph5a9813bfa7156', 'only_matching': True, }, { 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/view_video.php?viewkey=ph5a9813bfa7156', 'only_matching': True, }] def _extract_count(self, pattern, webpage, name): return str_to_int(self._search_regex(pattern, webpage, f'{name} count', default=None)) def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') or 'pornhub.com' video_id = mobj.group('id') self._login(host) self._set_age_cookies(host) def dl_webpage(platform): self._set_cookie(host, 'platform', platform) return self._download_webpage( f'https://www.{host}/view_video.php?viewkey={video_id}', video_id, f'Downloading {platform} webpage') webpage = dl_webpage('pc') error_msg = self._html_search_regex( (r'(?s)]+class=(["\'])(?:(?!\1).)*\b(?:removed|userMessageSection)\b(?:(?!\1).)*\1[^>]*>(?P.+?)', r'(?s)]+class=["\']noVideo["\'][^>]*>(?P.+?)'), webpage, 'error message', default=None, group='error') if error_msg: error_msg = re.sub(r'\s+', ' ', error_msg) raise ExtractorError( f'PornHub said: {error_msg}', expected=True, video_id=video_id) if any(re.search(p, webpage) for p in ( r'class=["\']geoBlocked["\']', r'>\s*This content is unavailable in your country')): self.raise_geo_restricted() # video_title from flashvars contains whitespace instead of non-ASCII (see # http://www.pornhub.com/view_video.php?viewkey=1331683002), not relying # on that anymore. title = self._html_search_meta( 'twitter:title', webpage, default=None) or self._html_search_regex( (r'(?s)]+class=["\']title["\'][^>]*>(?P.+?)</h1>', r'<div[^>]+data-video-title=(["\'])(?P<title>(?:(?!\1).)+)\1', r'shareTitle["\']\s*[=:]\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), webpage, 'title', group='title') video_urls = [] video_urls_set = set() subtitles = {} flashvars = self._parse_json( self._search_regex( r'var\s+flashvars_\d+\s*=\s*({.+?});', webpage, 'flashvars', default='{}'), video_id) if flashvars: subtitle_url = url_or_none(flashvars.get('closedCaptionsFile')) if subtitle_url: subtitles.setdefault('en', []).append({ 'url': subtitle_url, 'ext': 'srt', }) thumbnail = flashvars.get('image_url') duration = int_or_none(flashvars.get('video_duration')) media_definitions = flashvars.get('mediaDefinitions') if isinstance(media_definitions, list): for definition in media_definitions: if not isinstance(definition, dict): continue video_url = definition.get('videoUrl') if not video_url or not isinstance(video_url, str): continue if video_url in video_urls_set: continue video_urls_set.add(video_url) video_urls.append( (video_url, int_or_none(definition.get('quality')))) else: thumbnail, duration = [None] * 2 def extract_js_vars(webpage, pattern, default=NO_DEFAULT): assignments = self._search_regex( pattern, webpage, 'encoded url', default=default) if not assignments: return {} assignments = assignments.split(';') js_vars = {} def parse_js_value(inp): inp = re.sub(r'/\*(?:(?!\*/).)*?\*/', '', inp) if '+' in inp: inps = inp.split('+') return functools.reduce( operator.concat, map(parse_js_value, inps)) inp = inp.strip() if inp in js_vars: return js_vars[inp] return remove_quotes(inp) for assn in assignments: assn = assn.strip() if not assn: continue assn = re.sub(r'var\s+', '', assn) vname, value = assn.split('=', 1) js_vars[vname] = parse_js_value(value) return js_vars def add_video_url(video_url): v_url = url_or_none(video_url) if not v_url: return if v_url in video_urls_set: return video_urls.append((v_url, None)) video_urls_set.add(v_url) def parse_quality_items(quality_items): q_items = self._parse_json(quality_items, video_id, fatal=False) if not isinstance(q_items, list): return for item in q_items: if isinstance(item, dict): add_video_url(item.get('url')) if not video_urls: FORMAT_PREFIXES = ('media', 'quality', 'qualityItems') js_vars = extract_js_vars( webpage, r'(var\s+(?:{})_.+)'.format('|'.join(FORMAT_PREFIXES)), default=None) if js_vars: for key, format_url in js_vars.items(): if key.startswith(FORMAT_PREFIXES[-1]): parse_quality_items(format_url) elif any(key.startswith(p) for p in FORMAT_PREFIXES[:2]): add_video_url(format_url) if not video_urls and re.search( r'<[^>]+\bid=["\']lockedPlayer', webpage): raise ExtractorError( f'Video {video_id} is locked', expected=True) if not video_urls: js_vars = extract_js_vars( dl_webpage('tv'), r'(var.+?mediastring.+?)</script>') add_video_url(js_vars['mediastring']) for mobj in re.finditer( r'<a[^>]+\bclass=["\']downloadBtn\b[^>]+\bhref=(["\'])(?P<url>(?:(?!\1).)+)\1', webpage): video_url = mobj.group('url') if video_url not in video_urls_set: video_urls.append((video_url, None)) video_urls_set.add(video_url) upload_date = None formats = [] def add_format(format_url, height=None): ext = determine_ext(format_url) if ext == 'mpd': formats.extend(self._extract_mpd_formats( format_url, video_id, mpd_id='dash', fatal=False)) return if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) return if not height: height = int_or_none(self._search_regex( r'(?P<height>\d+)[pP]?_\d+[kK]', format_url, 'height', default=None)) formats.append({ 'url': format_url, 'format_id': format_field(height, None, '%dp'), 'height': height, }) for video_url, height in video_urls: if not upload_date: upload_date = self._search_regex( r'/(\d{6}/\d{2})/', video_url, 'upload data', default=None) if upload_date: upload_date = upload_date.replace('/', '') if '/video/get_media' in video_url: medias = self._download_json(video_url, video_id, fatal=False) if isinstance(medias, list): for media in medias: if not isinstance(media, dict): continue video_url = url_or_none(media.get('videoUrl')) if not video_url: continue height = int_or_none(media.get('quality')) add_format(video_url, height) continue add_format(video_url) model_profile = self._search_json( r'var\s+MODEL_PROFILE\s*=', webpage, 'model profile', video_id, fatal=False) video_uploader = self._html_search_regex( r'(?s)From: .+?<(?:a\b[^>]+\bhref=["\']/(?:(?:user|channel)s|model|pornstar)/|span\b[^>]+\bclass=["\']username)[^>]+>(.+?)<', webpage, 'uploader', default=None) or model_profile.get('username') def extract_vote_count(kind, name): return self._extract_count( (rf'<span[^>]+\bclass="votes{kind}"[^>]*>([\d,\.]+)</span>', rf'<span[^>]+\bclass=["\']votes{kind}["\'][^>]*\bdata-rating=["\'](\d+)'), webpage, name) view_count = self._extract_count( r'<span class="count">([\d,\.]+)</span> [Vv]iews', webpage, 'view') like_count = extract_vote_count('Up', 'like') dislike_count = extract_vote_count('Down', 'dislike') comment_count = self._extract_count( r'All Comments\s*<span>\(([\d,.]+)\)', webpage, 'comment') def extract_list(meta_key): div = self._search_regex( rf'(?s)<div[^>]+\bclass=["\'].*?\b{meta_key}Wrapper[^>]*>(.+?)</div>', webpage, meta_key, default=None) if div: return [clean_html(x).strip() for x in re.findall(r'(?s)<a[^>]+\bhref=[^>]+>.+?</a>', div)] info = self._search_json_ld(webpage, video_id, default={}) # description provided in JSON-LD is irrelevant info['description'] = None return merge_dicts({ 'id': video_id, 'uploader': video_uploader, 'uploader_id': remove_start(model_profile.get('modelProfileLink'), '/model/'), 'upload_date': upload_date, 'title': title, 'thumbnail': thumbnail, 'duration': duration, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'comment_count': comment_count, 'formats': formats, 'age_limit': 18, 'tags': extract_list('tags'), 'categories': extract_list('categories'), 'cast': extract_list('pornstars'), 'subtitles': subtitles, }, info) class PornHubPlaylistBaseIE(PornHubBaseIE): def _extract_page(self, url): return int_or_none(self._search_regex( r'\bpage=(\d+)', url, 'page', default=None)) def _extract_entries(self, webpage, host): # Only process container div with main playlist content skipping # drop-down menu that uses similar pattern for videos (see # https://github.com/ytdl-org/youtube-dl/issues/11594). container = self._search_regex( r'(?s)(<div[^>]+class=["\']container.+)', webpage, 'container', default=webpage) return [ self.url_result( f'http://www.{host}/{video_url}', PornHubIE.ie_key(), video_title=title) for video_url, title in orderedSet(re.findall( r'href="/?(view_video\.php\?.*\bviewkey=[\da-z]+[^"]*)"[^>]*\s+title="([^"]+)"', container)) ] class PornHubUserIE(PornHubPlaylistBaseIE): _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/?#&]+))(?:[?#&]|/(?!videos)|$)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph', 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious', 'info_dict': { 'id': 'liz-vicious', }, 'playlist_mincount': 118, }, { 'url': 'https://www.pornhub.com/users/russianveet69', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/channels/povd', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/model/zoe_ph?abc=1', 'only_matching': True, }, { # Unavailable via /videos page, but available with direct pagination # on pornstar page (see [1]), requires premium # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 'url': 'https://www.pornhubpremium.com/pornstar/sienna-west', 'only_matching': True, }, { # Same as before, multi page 'url': 'https://www.pornhubpremium.com/pornstar/lily-labeau', 'only_matching': True, }, { 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) user_id = mobj.group('id') videos_url = '{}/videos'.format(mobj.group('url')) self._set_age_cookies(mobj.group('host')) page = self._extract_page(url) if page: videos_url = update_url_query(videos_url, {'page': page}) return self.url_result( videos_url, ie=PornHubPagedVideoListIE.ie_key(), video_id=user_id) class PornHubPagedPlaylistBaseIE(PornHubPlaylistBaseIE): @staticmethod def _has_more(webpage): return re.search( r'''(?x) <li[^>]+\bclass=["\']page_next| <link[^>]+\brel=["\']next| <button[^>]+\bid=["\']moreDataBtn ''', webpage) is not None def _entries(self, url, host, item_id): page = self._extract_page(url) VIDEOS = '/videos' def download_page(base_url, num, fallback=False): note = 'Downloading page {}{}'.format(num, ' (switch to fallback)' if fallback else '') return self._download_webpage( base_url, item_id, note, query={'page': num}) def is_404(e): return isinstance(e.cause, HTTPError) and e.cause.status == 404 base_url = url has_page = page is not None first_page = page if has_page else 1 for page_num in (first_page, ) if has_page else itertools.count(first_page): try: try: webpage = download_page(base_url, page_num) except ExtractorError as e: # Some sources may not be available via /videos page, # trying to fallback to main page pagination (see [1]) # 1. https://github.com/ytdl-org/youtube-dl/issues/27853 if is_404(e) and page_num == first_page and VIDEOS in base_url: base_url = base_url.replace(VIDEOS, '') webpage = download_page(base_url, page_num, fallback=True) else: raise except ExtractorError as e: if is_404(e) and page_num != first_page: break raise page_entries = self._extract_entries(webpage, host) if not page_entries: break yield from page_entries if not self._has_more(webpage): break def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') item_id = mobj.group('id') self._login(host) self._set_age_cookies(host) return self.playlist_result(self._entries(url, host, item_id), item_id) class PornHubPagedVideoListIE(PornHubPagedPlaylistBaseIE): _VALID_URL = rf'https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?!playlist/)(?P<id>(?:[^/]+/)*[^/?#&]+)' _TESTS = [{ 'url': 'https://www.pornhub.com/model/zoe_ph/videos', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/rushandlia/videos', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos', 'info_dict': { 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 149, }, { 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos?page=3', 'info_dict': { 'id': 'pornstar/jenny-blighe/videos', }, 'playlist_mincount': 40, }, { # default sorting as Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos', 'info_dict': { 'id': 'channels/povd/videos', }, 'playlist_mincount': 293, }, { # Top Rated Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=ra', 'only_matching': True, }, { # Most Recent Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=da', 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/channels/povd/videos?o=vi', 'only_matching': True, }, { 'url': 'http://www.pornhub.com/users/zoe_ph/videos/public', 'only_matching': True, }, { # Most Viewed Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=mv', 'only_matching': True, }, { # Top Rated Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=tr', 'only_matching': True, }, { # Longest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=lg', 'only_matching': True, }, { # Newest Videos 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos?o=cm', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/paid', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/pornstar/liz-vicious/videos/fanonly', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video?page=3', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video/search?search=123', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/categories/teen', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/categories/teen?page=3', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/hd', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/hd?page=3', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/described-video', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/described-video?page=2', 'only_matching': True, }, { 'url': 'https://www.pornhub.com/video/incategories/60fps-1/hd-porn', 'only_matching': True, }, { 'url': 'https://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/model/zoe_ph/videos', 'only_matching': True, }] @classmethod def suitable(cls, url): return (False if PornHubIE.suitable(url) or PornHubUserIE.suitable(url) or PornHubUserVideosUploadIE.suitable(url) else super().suitable(url)) class PornHubUserVideosUploadIE(PornHubPagedPlaylistBaseIE): _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/(?:(?:user|channel)s|model|pornstar)/(?P<id>[^/]+)/videos/upload)' _TESTS = [{ 'url': 'https://www.pornhub.com/pornstar/jenny-blighe/videos/upload', 'info_dict': { 'id': 'jenny-blighe', }, 'playlist_mincount': 129, }, { 'url': 'https://www.pornhub.com/model/zoe_ph/videos/upload', 'only_matching': True, }, { 'url': 'http://pornhubvybmsymdol4iibwgwtkpwmeyd6luq2gxajgjzfjvotyt5zhyd.onion/pornstar/jenny-blighe/videos/upload', 'only_matching': True, }] class PornHubPlaylistIE(PornHubPlaylistBaseIE): _VALID_URL = rf'(?P<url>https?://(?:[^/]+\.)?{PornHubBaseIE._PORNHUB_HOST_RE}/playlist/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.pornhub.com/playlist/44121572', 'info_dict': { 'id': '44121572', }, 'playlist_count': 77, }, { 'url': 'https://www.pornhub.com/playlist/4667351', 'only_matching': True, }, { 'url': 'https://de.pornhub.com/playlist/4667351', 'only_matching': True, }, { 'url': 'https://de.pornhub.com/playlist/4667351?page=2', 'only_matching': True, }] def _entries(self, url, host, item_id): webpage = self._download_webpage(url, item_id, 'Downloading page 1') playlist_id = self._search_regex(r'var\s+playlistId\s*=\s*"([^"]+)"', webpage, 'playlist_id') video_count = int_or_none( self._search_regex(r'var\s+itemsCount\s*=\s*([0-9]+)\s*\|\|', webpage, 'video_count')) token = self._search_regex(r'var\s+token\s*=\s*"([^"]+)"', webpage, 'token') page_count = math.ceil((video_count - 36) / 40.) + 1 page_entries = self._extract_entries(webpage, host) def download_page(page_num): note = f'Downloading page {page_num}' page_url = f'https://www.{host}/playlist/viewChunked' return self._download_webpage(page_url, item_id, note, query={ 'id': playlist_id, 'page': page_num, 'token': token, }) for page_num in range(1, page_count + 1): if page_num > 1: webpage = download_page(page_num) page_entries = self._extract_entries(webpage, host) if not page_entries: break yield from page_entries def _real_extract(self, url): mobj = self._match_valid_url(url) host = mobj.group('host') item_id = mobj.group('id') self._login(host) self._set_age_cookies(host) return self.playlist_result(self._entries(mobj.group('url'), host, item_id), item_id) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pornotube.py�����������������������������������������������������0000664�0000000�0000000�00000006032�14675634471�0021251�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import int_or_none class PornotubeIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)?pornotube\.com/(?:[^?#]*?)/video/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.pornotube.com/orientation/straight/video/4964/title/weird-hot-and-wet-science', 'md5': '60fc5a4f0d93a97968fc7999d98260c9', 'info_dict': { 'id': '4964', 'ext': 'mp4', 'upload_date': '20141203', 'title': 'Weird Hot and Wet Science', 'description': 'md5:a8304bef7ef06cb4ab476ca6029b01b0', 'categories': ['Adult Humor', 'Blondes'], 'uploader': 'Alpha Blue Archives', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1417582800, 'age_limit': 18, }, } def _real_extract(self, url): video_id = self._match_id(url) token = self._download_json( 'https://api.aebn.net/auth/v2/origins/authenticate', video_id, note='Downloading token', data=json.dumps({'credentials': 'Clip Application'}).encode(), headers={ 'Content-Type': 'application/json', 'Origin': 'http://www.pornotube.com', })['tokenKey'] video_url = self._download_json( f'https://api.aebn.net/delivery/v1/clips/{video_id}/MP4', video_id, note='Downloading delivery information', headers={'Authorization': token})['mediaUrl'] FIELDS = ( 'title', 'description', 'startSecond', 'endSecond', 'publishDate', 'studios{name}', 'categories{name}', 'movieId', 'primaryImageNumber', ) info = self._download_json( 'https://api.aebn.net/content/v2/clips/{}?fields={}'.format(video_id, ','.join(FIELDS)), video_id, note='Downloading metadata', headers={'Authorization': token}) if isinstance(info, list): info = info[0] title = info['title'] timestamp = int_or_none(info.get('publishDate'), scale=1000) uploader = info.get('studios', [{}])[0].get('name') movie_id = info.get('movieId') primary_image_number = info.get('primaryImageNumber') thumbnail = None if movie_id and primary_image_number: thumbnail = 'http://pic.aebn.net/dis/t/%s/%s_%08d.jpg' % ( movie_id, movie_id, primary_image_number) start = int_or_none(info.get('startSecond')) end = int_or_none(info.get('endSecond')) duration = end - start if start and end else None categories = [c['name'] for c in info.get('categories', []) if c.get('name')] return { 'id': video_id, 'url': video_url, 'title': title, 'description': info.get('description'), 'duration': duration, 'timestamp': timestamp, 'uploader': uploader, 'thumbnail': thumbnail, 'categories': categories, 'age_limit': 18, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pornovoisines.py�������������������������������������������������0000664�0000000�0000000�00000007514�14675634471�0022157�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( float_or_none, int_or_none, unified_strdate, ) class PornoVoisinesIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?pornovoisines\.com/videos/show/(?P<id>\d+)/(?P<display_id>[^/.]+)' _TEST = { 'url': 'http://www.pornovoisines.com/videos/show/919/recherche-appartement.html', 'md5': '6f8aca6a058592ab49fe701c8ba8317b', 'info_dict': { 'id': '919', 'display_id': 'recherche-appartement', 'ext': 'mp4', 'title': 'Recherche appartement', 'description': 'md5:fe10cb92ae2dd3ed94bb4080d11ff493', 'thumbnail': r're:^https?://.*\.jpg$', 'upload_date': '20140925', 'duration': 120, 'view_count': int, 'average_rating': float, 'categories': ['Débutante', 'Débutantes', 'Scénario', 'Sodomie'], 'age_limit': 18, 'subtitles': { 'fr': [{ 'ext': 'vtt', }], }, }, } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') display_id = mobj.group('display_id') settings_url = self._download_json( f'http://www.pornovoisines.com/api/video/{video_id}/getsettingsurl/', video_id, note='Getting settings URL')['video_settings_url'] settings = self._download_json(settings_url, video_id)['data'] formats = [] for kind, data in settings['variants'].items(): if kind == 'HLS': formats.extend(self._extract_m3u8_formats( data, video_id, ext='mp4', entry_protocol='m3u8_native', m3u8_id='hls')) elif kind == 'MP4': for item in data: formats.append({ 'url': item['url'], 'height': item.get('height'), 'bitrate': item.get('bitrate'), }) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) description = self._og_search_description(webpage) # The webpage has a bug - there's no space between "thumb" and src= thumbnail = self._html_search_regex( r'<img[^>]+class=([\'"])thumb\1[^>]*src=([\'"])(?P<url>[^"]+)\2', webpage, 'thumbnail', fatal=False, group='url') upload_date = unified_strdate(self._search_regex( r'Le\s*<b>([\d/]+)', webpage, 'upload date', fatal=False)) duration = settings.get('main', {}).get('duration') view_count = int_or_none(self._search_regex( r'(\d+) vues', webpage, 'view count', fatal=False)) average_rating = self._search_regex( r'Note\s*:\s*(\d+(?:,\d+)?)', webpage, 'average rating', fatal=False) if average_rating: average_rating = float_or_none(average_rating.replace(',', '.')) categories = self._html_search_regex( r'(?s)Catégories\s*:\s*<b>(.+?)</b>', webpage, 'categories', fatal=False) if categories: categories = [category.strip() for category in categories.split(',')] subtitles = {'fr': [{ 'url': subtitle, } for subtitle in settings.get('main', {}).get('vtt_tracks', {}).values()]} return { 'id': video_id, 'display_id': display_id, 'formats': formats, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'average_rating': average_rating, 'categories': categories, 'age_limit': 18, 'subtitles': subtitles, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pornoxo.py�������������������������������������������������������0000664�0000000�0000000�00000003561�14675634471�0020744�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( str_to_int, ) class PornoXOIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?pornoxo\.com/videos/(?P<id>\d+)/(?P<display_id>[^/]+)\.html' _TEST = { 'url': 'http://www.pornoxo.com/videos/7564/striptease-from-sexy-secretary.html', 'md5': '582f28ecbaa9e6e24cb90f50f524ce87', 'info_dict': { 'id': '7564', 'ext': 'flv', 'title': 'Striptease From Sexy Secretary!', 'display_id': 'striptease-from-sexy-secretary', 'description': 'md5:0ee35252b685b3883f4a1d38332f9980', 'categories': list, # NSFW 'thumbnail': r're:https?://.*\.jpg$', 'age_limit': 18, }, } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id, display_id = mobj.groups() webpage = self._download_webpage(url, video_id) video_data = self._extract_jwplayer_data(webpage, video_id, require_title=False) title = self._html_search_regex( r'<title>([^<]+)\s*-\s*PornoXO', webpage, 'title') view_count = str_to_int(self._html_search_regex( r'[vV]iews:\s*([0-9,]+)', webpage, 'view count', fatal=False)) categories_str = self._html_search_regex( r'<meta name="description" content=".*featuring\s*([^"]+)"', webpage, 'categories', fatal=False) categories = ( None if categories_str is None else categories_str.split(',')) video_data.update({ 'id': video_id, 'title': title, 'display_id': display_id, 'description': self._html_search_meta('description', webpage), 'categories': categories, 'view_count': view_count, 'age_limit': 18, }) return video_data �����������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pr0gramm.py������������������������������������������������������0000664�0000000�0000000�00000017647�14675634471�0020777�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import datetime as dt import functools import json import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, int_or_none, make_archive_id, mimetype2ext, str_or_none, urljoin, ) from ..utils.traversal import traverse_obj class Pr0grammIE(InfoExtractor): _VALID_URL = r'https?://pr0gramm\.com\/(?:[^/?#]+/)+(?P<id>[\d]+)(?:[/?#:]|$)' _TESTS = [{ 'url': 'https://pr0gramm.com/new/video/5466437', 'info_dict': { 'id': '5466437', 'ext': 'mp4', 'title': 'pr0gramm-5466437 by g11st', 'tags': ['Neon Genesis Evangelion', 'Touhou Project', 'Fly me to the Moon', 'Marisad', 'Marisa Kirisame', 'video', 'sound', 'Marisa', 'Anime'], 'uploader': 'g11st', 'uploader_id': '394718', 'timestamp': 1671590240, 'upload_date': '20221221', 'like_count': int, 'dislike_count': int, 'age_limit': 0, 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', '_old_archive_ids': ['pr0grammstatic 5466437'], }, }, { 'url': 'https://pr0gramm.com/new/3052805:comment28391322', 'info_dict': { 'id': '3052805', 'ext': 'mp4', 'title': 'pr0gramm-3052805 by Hansking1', 'tags': 'count:15', 'uploader': 'Hansking1', 'uploader_id': '385563', 'timestamp': 1552930408, 'upload_date': '20190318', 'like_count': int, 'dislike_count': int, 'age_limit': 0, 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', '_old_archive_ids': ['pr0grammstatic 3052805'], }, }, { # Requires verified account 'url': 'https://pr0gramm.com/new/Gianna%20Michaels/5848332', 'info_dict': { 'id': '5848332', 'ext': 'mp4', 'title': 'pr0gramm-5848332 by erd0pfel', 'tags': 'count:18', 'uploader': 'erd0pfel', 'uploader_id': '349094', 'timestamp': 1694489652, 'upload_date': '20230912', 'like_count': int, 'dislike_count': int, 'age_limit': 18, 'thumbnail': r're:^https://thumb\.pr0gramm\.com/.*\.jpg', '_old_archive_ids': ['pr0grammstatic 5848332'], }, }, { 'url': 'https://pr0gramm.com/top/5895149', 'info_dict': { 'id': '5895149', 'ext': 'mp4', 'title': 'pr0gramm-5895149 by algoholigSeeManThrower', 'tags': 'count:19', 'uploader': 'algoholigSeeManThrower', 'uploader_id': '457556', 'timestamp': 1697580902, 'upload_date': '20231018', 'like_count': int, 'dislike_count': int, 'age_limit': 0, 'thumbnail': 'https://thumb.pr0gramm.com/2023/10/18/db47bb3db5e1a1b3.jpg', '_old_archive_ids': ['pr0grammstatic 5895149'], }, }, { 'url': 'https://pr0gramm.com/static/5466437', 'only_matching': True, }, { 'url': 'https://pr0gramm.com/new/rowan%20atkinson%20herr%20bohne/3052805', 'only_matching': True, }, { 'url': 'https://pr0gramm.com/user/froschler/dafur-ist-man-hier/5091290', 'only_matching': True, }] BASE_URL = 'https://pr0gramm.com' @functools.cached_property def _is_logged_in(self): return 'pp' in self._get_cookies(self.BASE_URL) @functools.cached_property def _maximum_flags(self): # We need to guess the flags for the content otherwise the api will raise an error # We can guess the maximum allowed flags for the account from the cookies # Bitflags are (msbf): pol, nsfp, nsfl, nsfw, sfw flags = 0b10001 if self._is_logged_in: flags |= 0b01000 cookies = self._get_cookies(self.BASE_URL) if 'me' not in cookies: self._download_webpage(self.BASE_URL, None, 'Refreshing verification information') if traverse_obj(cookies, ('me', {lambda x: x.value}, {urllib.parse.unquote}, {json.loads}, 'verified')): flags |= 0b00110 return flags def _call_api(self, endpoint, video_id, query={}, note='Downloading API json'): data = self._download_json( f'https://pr0gramm.com/api/items/{endpoint}', video_id, note, query=query, expected_status=403) error = traverse_obj(data, ('error', {str})) if error in ('nsfwRequired', 'nsflRequired', 'nsfpRequired', 'verificationRequired'): if not self._is_logged_in: self.raise_login_required() raise ExtractorError(f'Unverified account cannot access NSFW/NSFL ({error})', expected=True) elif error: message = traverse_obj(data, ('msg', {str})) or error raise ExtractorError(f'API returned error: {message}', expected=True) return data @staticmethod def _create_source_url(path): return urljoin('https://img.pr0gramm.com', path) def _real_extract(self, url): video_id = self._match_id(url) video_info = traverse_obj( self._call_api('get', video_id, {'id': video_id, 'flags': self._maximum_flags}), ('items', 0, {dict})) source = video_info.get('image') if not source or not source.endswith('mp4'): self.raise_no_formats('Could not extract a video', expected=bool(source), video_id=video_id) metadata = self._call_api('info', video_id, {'itemId': video_id}, note='Downloading tags') tags = traverse_obj(metadata, ('tags', ..., 'tag', {str})) # Sorted by "confidence", higher confidence = earlier in list confidences = traverse_obj(metadata, ('tags', ..., 'confidence', ({int}, {float}))) if confidences: tags = [tag for _, tag in sorted(zip(confidences, tags), reverse=True)] formats = traverse_obj(video_info, ('variants', ..., { 'format_id': ('name', {str}), 'url': ('path', {self._create_source_url}), 'ext': ('mimeType', {mimetype2ext}), 'vcodec': ('codec', {str}), 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), 'bitrate': ('bitRate', {float_or_none}), 'filesize': ('fileSize', {int_or_none}), })) if video_info.get('variants') else [{ 'ext': 'mp4', 'format_id': 'source', **traverse_obj(video_info, { 'url': ('image', {self._create_source_url}), 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), }), }] subtitles = {} for subtitle in traverse_obj(video_info, ('subtitles', lambda _, v: v['language'])): subtitles.setdefault(subtitle['language'], []).append(traverse_obj(subtitle, { 'url': ('path', {self._create_source_url}), 'note': ('label', {str}), })) return { 'id': video_id, 'title': f'pr0gramm-{video_id} by {video_info.get("user")}', 'tags': tags, 'formats': formats, 'subtitles': subtitles, 'age_limit': 18 if traverse_obj(video_info, ('flags', {0b110.__and__})) else 0, '_old_archive_ids': [make_archive_id('Pr0grammStatic', video_id)], **traverse_obj(video_info, { 'uploader': ('user', {str}), 'uploader_id': ('userId', {str_or_none}), 'like_count': ('up', {int}), 'dislike_count': ('down', {int}), 'timestamp': ('created', {int}), 'upload_date': ('created', {int}, {dt.date.fromtimestamp}, {lambda x: x.strftime('%Y%m%d')}), 'thumbnail': ('thumb', {lambda x: urljoin('https://thumb.pr0gramm.com', x)}), }), } �����������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/prankcast.py�����������������������������������������������������0000664�0000000�0000000�00000013144�14675634471�0021224�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import float_or_none, parse_iso8601, str_or_none, try_call from ..utils.traversal import traverse_obj class PrankCastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/showreel/(?P<id>\d+)-(?P<display_id>[^/?#]+)' _TESTS = [{ 'url': 'https://prankcast.com/Devonanustart/showreel/1561-Beverly-is-back-like-a-heart-attack-', 'info_dict': { 'id': '1561', 'ext': 'mp3', 'title': 'Beverly is back like a heart attack!', 'display_id': 'Beverly-is-back-like-a-heart-attack-', 'timestamp': 1661391575, 'uploader': 'Devonanustart', 'channel_id': '4', 'duration': 7918, 'cast': ['Devonanustart', 'Phonelosers'], 'description': '', 'categories': ['prank'], 'tags': ['prank call', 'prank', 'live show'], 'upload_date': '20220825', }, }, { 'url': 'https://prankcast.com/phonelosers/showreel/2048-NOT-COOL', 'info_dict': { 'id': '2048', 'ext': 'mp3', 'title': 'NOT COOL', 'display_id': 'NOT-COOL', 'timestamp': 1665028364, 'uploader': 'phonelosers', 'channel_id': '6', 'duration': 4044, 'cast': ['phonelosers'], 'description': '', 'categories': ['prank'], 'tags': ['prank call', 'prank', 'live show'], 'upload_date': '20221006', }, }] def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') webpage = self._download_webpage(url, video_id) json_info = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_showreel'] uploader = json_info.get('user_name') guests_json = self._parse_json(json_info.get('guests_json') or '{}', video_id) start_date = parse_iso8601(json_info.get('start_date')) return { 'id': video_id, 'title': json_info.get('broadcast_title') or self._og_search_title(webpage), 'display_id': display_id, 'url': f'{json_info["broadcast_url"]}{json_info["recording_hash"]}.mp3', 'timestamp': start_date, 'uploader': uploader, 'channel_id': str_or_none(json_info.get('user_id')), 'duration': try_call(lambda: parse_iso8601(json_info['end_date']) - start_date), 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])), 'description': json_info.get('broadcast_description'), 'categories': [json_info.get('broadcast_category')], 'tags': try_call(lambda: json_info['broadcast_tags'].split(',')), } class PrankCastPostIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?prankcast\.com/[^/?#]+/posts/(?P<id>\d+)-(?P<display_id>[^/?#]+)' _TESTS = [{ 'url': 'https://prankcast.com/devonanustart/posts/6214-happy-national-rachel-day-', 'info_dict': { 'id': '6214', 'ext': 'mp3', 'title': 'Happy National Rachel Day!', 'display_id': 'happy-national-rachel-day-', 'timestamp': 1704333938, 'uploader': 'Devonanustart', 'channel_id': '4', 'duration': 13175, 'cast': ['Devonanustart'], 'description': '', 'categories': ['prank call'], 'upload_date': '20240104', }, }, { 'url': 'https://prankcast.com/despicabledogs/posts/6217-jake-the-work-crow-', 'info_dict': { 'id': '6217', 'ext': 'mp3', 'title': 'Jake the Work Crow!', 'display_id': 'jake-the-work-crow-', 'timestamp': 1704346592, 'uploader': 'despicabledogs', 'channel_id': '957', 'duration': 263.287, 'cast': ['despicabledogs'], 'description': 'https://imgur.com/a/vtxLvKU', 'categories': [], 'upload_date': '20240104', }, }] def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') webpage = self._download_webpage(url, video_id) post = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['ssr_data_posts'] content = self._parse_json(post['post_contents_json'], video_id)[0] uploader = post.get('user_name') guests_json = traverse_obj(content, ('guests_json', {json.loads}, {dict})) or {} return { 'id': video_id, 'title': post.get('post_title') or self._og_search_title(webpage), 'display_id': display_id, 'url': content.get('url'), 'timestamp': parse_iso8601(content.get('start_date') or content.get('crdate'), ' '), 'uploader': uploader, 'channel_id': str_or_none(post.get('user_id')), 'duration': float_or_none(content.get('duration')), 'cast': list(filter(None, [uploader, *traverse_obj(guests_json, (..., 'name'))])), 'description': post.get('post_body'), 'categories': list(filter(None, [content.get('category')])), 'tags': try_call(lambda: list(filter('', post['post_tags'].split(',')))), 'subtitles': { 'live_chat': [{ 'url': f'https://prankcast.com/api/private/chat/select-broadcast?id={post["content_id"]}&cache=', 'ext': 'json', }], } if post.get('content_id') else None, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/premiershiprugby.py����������������������������������������������0000664�0000000�0000000�00000003454�14675634471�0022641�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import int_or_none, traverse_obj class PremiershipRugbyIE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.)premiershiprugby\.(?:com)/watch/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://www.premiershiprugby.com/watch/full-match-harlequins-v-newcastle-falcons', 'info_dict': { 'id': '0_mbkb7ldt', 'title': 'Full Match: Harlequins v Newcastle Falcons', 'ext': 'mp4', 'thumbnail': 'https://open.http.mp.streamamg.com/p/3000914/sp/300091400/thumbnail/entry_id/0_mbkb7ldt//width/960/height/540/type/1/quality/75', 'duration': 6093.0, 'tags': ['video'], 'categories': ['Full Match', 'Harlequins', 'Newcastle Falcons', 'gallaher premiership'], }, }] def _real_extract(self, url): display_id = self._match_id(url) json_data = self._download_json( f'https://article-cms-api.incrowdsports.com/v2/articles/slug/{display_id}', display_id, query={'clientId': 'PRL'})['data']['article'] formats, subs = self._extract_m3u8_formats_and_subtitles( json_data['heroMedia']['content']['videoLink'], display_id) return { 'id': json_data['heroMedia']['content']['sourceSystemId'], 'display_id': display_id, 'title': traverse_obj(json_data, ('heroMedia', 'title')), 'formats': formats, 'subtitles': subs, 'thumbnail': traverse_obj(json_data, ('heroMedia', 'content', 'videoThumbnail')), 'duration': int_or_none(traverse_obj(json_data, ('heroMedia', 'content', 'metadata', 'msDuration')), scale=1000), 'tags': json_data.get('tags'), 'categories': traverse_obj(json_data, ('categories', ..., 'text')), } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/presstv.py�������������������������������������������������������0000664�0000000�0000000�00000004420�14675634471�0020741�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import remove_start class PressTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?presstv\.ir/[^/]+/(?P<y>\d+)/(?P<m>\d+)/(?P<d>\d+)/(?P<id>\d+)/(?P<display_id>[^/]+)?' _TEST = { 'url': 'http://www.presstv.ir/Detail/2016/04/09/459911/Australian-sewerage-treatment-facility-/', 'md5': '5d7e3195a447cb13e9267e931d8dd5a5', 'info_dict': { 'id': '459911', 'display_id': 'Australian-sewerage-treatment-facility-', 'ext': 'mp4', 'title': 'Organic mattresses used to clean waste water', 'upload_date': '20160409', 'thumbnail': r're:^https?://.*\.jpg', 'description': 'md5:20002e654bbafb6908395a5c0cfcd125', }, } def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') display_id = mobj.group('display_id') or video_id webpage = self._download_webpage(url, display_id) # extract video URL from webpage video_url = self._hidden_inputs(webpage)['inpPlayback'] # build list of available formats # specified in http://www.presstv.ir/Scripts/playback.js base_url = 'http://192.99.219.222:82/presstv' _formats = [ (180, '_low200.mp4'), (360, '_low400.mp4'), (720, '_low800.mp4'), (1080, '.mp4'), ] formats = [{ 'url': base_url + video_url[:-4] + extension, 'format_id': f'{height}p', 'height': height, } for height, extension in _formats] # extract video metadata title = remove_start( self._html_search_meta('title', webpage, fatal=True), 'PressTV-') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage) upload_date = '%04d%02d%02d' % ( int(mobj.group('y')), int(mobj.group('m')), int(mobj.group('d')), ) return { 'id': video_id, 'display_id': display_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, 'upload_date': upload_date, 'description': description, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/projectveritas.py������������������������������������������������0000664�0000000�0000000�00000004651�14675634471�0022305�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, traverse_obj, unified_strdate, ) class ProjectVeritasIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?projectveritas\.com/(?P<type>news|video)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://www.projectveritas.com/news/exclusive-inside-the-new-york-and-new-jersey-hospitals-battling-coronavirus/', 'info_dict': { 'id': '51910aab-365a-5cf1-88f2-8eb1ca5fd3c6', 'ext': 'mp4', 'title': 'Exclusive: Inside The New York and New Jersey Hospitals Battling Coronavirus', 'upload_date': '20200327', 'thumbnail': 'md5:6076477fe50b03eb8708be9415e18e1c', }, }, { 'url': 'https://www.projectveritas.com/video/ilhan-omar-connected-ballot-harvester-in-cash-for-ballots-scheme-car-is-full/', 'info_dict': { 'id': 'c5aab304-a56b-54b1-9f0b-03b77bc5f2f6', 'ext': 'mp4', 'title': 'Ilhan Omar connected Ballot Harvester in cash-for-ballots scheme: "Car is full" of absentee ballots', 'upload_date': '20200927', 'thumbnail': 'md5:194b8edf0e2ba64f25500ff4378369a4', }, }] def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') api_url = f'https://www.projectveritas.com/page-data/{video_type}/{video_id}/page-data.json' data_json = self._download_json(api_url, video_id)['result']['data'] main_data = traverse_obj(data_json, 'video', 'post') video_id = main_data['id'] thumbnail = traverse_obj(main_data, ('image', 'ogImage', 'src')) mux_asset = traverse_obj(main_data, 'muxAsset', ('body', 'json', 'content', ..., 'data', 'target', 'fields', 'muxAsset'), get_all=False, expected_type=dict) if not mux_asset: raise ExtractorError('No video on the provided url.', expected=True) playback_id = traverse_obj(mux_asset, 'playbackId', ('en-US', 'playbackId')) formats = self._extract_m3u8_formats(f'https://stream.mux.com/{playback_id}.m3u8', video_id) return { 'id': video_id, 'title': main_data['title'], 'upload_date': unified_strdate(main_data.get('date')), 'thumbnail': thumbnail.replace('//', ''), 'formats': formats, } ���������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/prosiebensat1.py�������������������������������������������������0000664�0000000�0000000�00000051715�14675634471�0022023�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import hashlib import re from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, float_or_none, int_or_none, join_nonempty, merge_dicts, unified_strdate, ) class ProSiebenSat1BaseIE(InfoExtractor): _GEO_BYPASS = False _ACCESS_ID = None _SUPPORTED_PROTOCOLS = 'dash:clear,hls:clear,progressive:clear' _V4_BASE_URL = 'https://vas-v4.p7s1video.net/4.0/get' def _extract_video_info(self, url, clip_id): client_location = url video = self._download_json( 'http://vas.sim-technik.de/vas/live/v2/videos', clip_id, 'Downloading videos JSON', query={ 'access_token': self._TOKEN, 'client_location': client_location, 'client_name': self._CLIENT_NAME, 'ids': clip_id, })[0] if not self.get_param('allow_unplayable_formats') and video.get('is_protected') is True: self.report_drm(clip_id) formats = [] if self._ACCESS_ID: raw_ct = self._ENCRYPTION_KEY + clip_id + self._IV + self._ACCESS_ID protocols = self._download_json( self._V4_BASE_URL + 'protocols', clip_id, 'Downloading protocols JSON', headers=self.geo_verification_headers(), query={ 'access_id': self._ACCESS_ID, 'client_token': hashlib.sha1((raw_ct).encode()).hexdigest(), 'video_id': clip_id, }, fatal=False, expected_status=(403,)) or {} error = protocols.get('error') or {} if error.get('title') == 'Geo check failed': self.raise_geo_restricted(countries=['AT', 'CH', 'DE']) server_token = protocols.get('server_token') if server_token: urls = (self._download_json( self._V4_BASE_URL + 'urls', clip_id, 'Downloading urls JSON', query={ 'access_id': self._ACCESS_ID, 'client_token': hashlib.sha1((raw_ct + server_token + self._SUPPORTED_PROTOCOLS).encode()).hexdigest(), 'protocols': self._SUPPORTED_PROTOCOLS, 'server_token': server_token, 'video_id': clip_id, }, fatal=False) or {}).get('urls') or {} for protocol, variant in urls.items(): source_url = variant.get('clear', {}).get('url') if not source_url: continue if protocol == 'dash': formats.extend(self._extract_mpd_formats( source_url, clip_id, mpd_id=protocol, fatal=False)) elif protocol == 'hls': formats.extend(self._extract_m3u8_formats( source_url, clip_id, 'mp4', 'm3u8_native', m3u8_id=protocol, fatal=False)) else: formats.append({ 'url': source_url, 'format_id': protocol, }) if not formats: source_ids = [str(source['id']) for source in video['sources']] client_id = self._SALT[:2] + hashlib.sha1(''.join([clip_id, self._SALT, self._TOKEN, client_location, self._SALT, self._CLIENT_NAME]).encode()).hexdigest() sources = self._download_json( f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources', clip_id, 'Downloading sources JSON', query={ 'access_token': self._TOKEN, 'client_id': client_id, 'client_location': client_location, 'client_name': self._CLIENT_NAME, }) server_id = sources['server_id'] def fix_bitrate(bitrate): bitrate = int_or_none(bitrate) if not bitrate: return None return (bitrate // 1000) if bitrate % 1000 == 0 else bitrate for source_id in source_ids: client_id = self._SALT[:2] + hashlib.sha1(''.join([self._SALT, clip_id, self._TOKEN, server_id, client_location, source_id, self._SALT, self._CLIENT_NAME]).encode()).hexdigest() urls = self._download_json( f'http://vas.sim-technik.de/vas/live/v2/videos/{clip_id}/sources/url', clip_id, 'Downloading urls JSON', fatal=False, query={ 'access_token': self._TOKEN, 'client_id': client_id, 'client_location': client_location, 'client_name': self._CLIENT_NAME, 'server_id': server_id, 'source_ids': source_id, }) if not urls: continue if urls.get('status_code') != 0: raise ExtractorError('This video is unavailable', expected=True) urls_sources = urls['sources'] if isinstance(urls_sources, dict): urls_sources = urls_sources.values() for source in urls_sources: source_url = source.get('url') if not source_url: continue protocol = source.get('protocol') mimetype = source.get('mimetype') if mimetype == 'application/f4m+xml' or 'f4mgenerator' in source_url or determine_ext(source_url) == 'f4m': formats.extend(self._extract_f4m_formats( source_url, clip_id, f4m_id='hds', fatal=False)) elif mimetype == 'application/x-mpegURL': formats.extend(self._extract_m3u8_formats( source_url, clip_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif mimetype == 'application/dash+xml': formats.extend(self._extract_mpd_formats( source_url, clip_id, mpd_id='dash', fatal=False)) else: tbr = fix_bitrate(source['bitrate']) if protocol in ('rtmp', 'rtmpe'): mobj = re.search(r'^(?P<url>rtmpe?://[^/]+)/(?P<path>.+)$', source_url) if not mobj: continue path = mobj.group('path') mp4colon_index = path.rfind('mp4:') app = path[:mp4colon_index] play_path = path[mp4colon_index:] formats.append({ 'url': '{}/{}'.format(mobj.group('url'), app), 'app': app, 'play_path': play_path, 'player_url': 'http://livepassdl.conviva.com/hf/ver/2.79.0.17083/LivePassModuleMain.swf', 'page_url': 'http://www.prosieben.de', 'tbr': tbr, 'ext': 'flv', 'format_id': join_nonempty('rtmp', tbr), }) else: formats.append({ 'url': source_url, 'tbr': tbr, 'format_id': join_nonempty('http', tbr), }) return { 'duration': float_or_none(video.get('duration')), 'formats': formats, } class ProSiebenSat1IE(ProSiebenSat1BaseIE): IE_NAME = 'prosiebensat1' IE_DESC = 'ProSiebenSat.1 Digital' _VALID_URL = r'''(?x) https?:// (?:www\.)? (?: (?:beta\.)? (?: prosieben(?:maxx)?|sixx|sat1(?:gold)?|kabeleins(?:doku)?|the-voice-of-germany|advopedia )\.(?:de|at|ch)| ran\.de|fem\.com|advopedia\.de|galileo\.tv/video ) /(?P<id>.+) ''' _TESTS = [ { # Tests changes introduced in https://github.com/ytdl-org/youtube-dl/pull/6242 # in response to fixing https://github.com/ytdl-org/youtube-dl/issues/6215: # - malformed f4m manifest support # - proper handling of URLs starting with `https?://` in 2.0 manifests # - recursive child f4m manifests extraction 'url': 'http://www.prosieben.de/tv/circus-halligalli/videos/218-staffel-2-episode-18-jahresrueckblick-ganze-folge', 'info_dict': { 'id': '2104602', 'ext': 'mp4', 'title': 'CIRCUS HALLIGALLI - Episode 18 - Staffel 2', 'description': 'md5:8733c81b702ea472e069bc48bb658fc1', 'upload_date': '20131231', 'duration': 5845.04, 'series': 'CIRCUS HALLIGALLI', 'season_number': 2, 'episode': 'Episode 18 - Staffel 2', 'episode_number': 18, }, }, { 'url': 'http://www.prosieben.de/videokatalog/Gesellschaft/Leben/Trends/video-Lady-Umstyling-f%C3%BCr-Audrina-Rebekka-Audrina-Fergen-billig-aussehen-Battal-Modica-700544.html', 'info_dict': { 'id': '2570327', 'ext': 'mp4', 'title': 'Lady-Umstyling für Audrina', 'description': 'md5:4c16d0c17a3461a0d43ea4084e96319d', 'upload_date': '20131014', 'duration': 606.76, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'Seems to be broken', }, { 'url': 'http://www.prosiebenmaxx.de/tv/experience/video/144-countdown-fuer-die-autowerkstatt-ganze-folge', 'info_dict': { 'id': '2429369', 'ext': 'mp4', 'title': 'Countdown für die Autowerkstatt', 'description': 'md5:809fc051a457b5d8666013bc40698817', 'upload_date': '20140223', 'duration': 2595.04, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.sixx.de/stars-style/video/sexy-laufen-in-ugg-boots-clip', 'info_dict': { 'id': '2904997', 'ext': 'mp4', 'title': 'Sexy laufen in Ugg Boots', 'description': 'md5:edf42b8bd5bc4e5da4db4222c5acb7d6', 'upload_date': '20140122', 'duration': 245.32, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.sat1.de/film/der-ruecktritt/video/im-interview-kai-wiesinger-clip', 'info_dict': { 'id': '2906572', 'ext': 'mp4', 'title': 'Im Interview: Kai Wiesinger', 'description': 'md5:e4e5370652ec63b95023e914190b4eb9', 'upload_date': '20140203', 'duration': 522.56, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.kabeleins.de/tv/rosins-restaurants/videos/jagd-auf-fertigkost-im-elsthal-teil-2-ganze-folge', 'info_dict': { 'id': '2992323', 'ext': 'mp4', 'title': 'Jagd auf Fertigkost im Elsthal - Teil 2', 'description': 'md5:2669cde3febe9bce13904f701e774eb6', 'upload_date': '20141014', 'duration': 2410.44, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.ran.de/fussball/bundesliga/video/schalke-toennies-moechte-raul-zurueck-ganze-folge', 'info_dict': { 'id': '3004256', 'ext': 'mp4', 'title': 'Schalke: Tönnies möchte Raul zurück', 'description': 'md5:4b5b271d9bcde223b54390754c8ece3f', 'upload_date': '20140226', 'duration': 228.96, }, 'params': { # rtmp download 'skip_download': True, }, 'skip': 'This video is unavailable', }, { 'url': 'http://www.the-voice-of-germany.de/video/31-andreas-kuemmert-rocket-man-clip', 'info_dict': { 'id': '2572814', 'ext': 'mp4', 'title': 'The Voice of Germany - Andreas Kümmert: Rocket Man', 'description': 'md5:6ddb02b0781c6adf778afea606652e38', 'timestamp': 1382041620, 'upload_date': '20131017', 'duration': 469.88, }, 'params': { 'skip_download': True, }, }, { 'url': 'http://www.fem.com/videos/beauty-lifestyle/kurztrips-zum-valentinstag', 'info_dict': { 'id': '2156342', 'ext': 'mp4', 'title': 'Kurztrips zum Valentinstag', 'description': 'Romantischer Kurztrip zum Valentinstag? Nina Heinemann verrät, was sich hier wirklich lohnt.', 'duration': 307.24, }, 'params': { 'skip_download': True, }, }, { 'url': 'http://www.prosieben.de/tv/joko-gegen-klaas/videos/playlists/episode-8-ganze-folge-playlist', 'info_dict': { 'id': '439664', 'title': 'Episode 8 - Ganze Folge - Playlist', 'description': 'md5:63b8963e71f481782aeea877658dec84', }, 'playlist_count': 2, 'skip': 'This video is unavailable', }, { # title in <h2 class="subtitle"> 'url': 'http://www.prosieben.de/stars/oscar-award/videos/jetzt-erst-enthuellt-das-geheimnis-von-emma-stones-oscar-robe-clip', 'info_dict': { 'id': '4895826', 'ext': 'mp4', 'title': 'Jetzt erst enthüllt: Das Geheimnis von Emma Stones Oscar-Robe', 'description': 'md5:e5ace2bc43fadf7b63adc6187e9450b9', 'upload_date': '20170302', }, 'params': { 'skip_download': True, }, 'skip': 'geo restricted to Germany', }, { # geo restricted to Germany 'url': 'http://www.kabeleinsdoku.de/tv/mayday-alarm-im-cockpit/video/102-notlandung-im-hudson-river-ganze-folge', 'only_matching': True, }, { # geo restricted to Germany 'url': 'http://www.sat1gold.de/tv/edel-starck/video/11-staffel-1-episode-1-partner-wider-willen-ganze-folge', 'only_matching': True, }, { # geo restricted to Germany 'url': 'https://www.galileo.tv/video/diese-emojis-werden-oft-missverstanden', 'only_matching': True, }, { 'url': 'http://www.sat1gold.de/tv/edel-starck/playlist/die-gesamte-1-staffel', 'only_matching': True, }, { 'url': 'http://www.advopedia.de/videos/lenssen-klaert-auf/lenssen-klaert-auf-folge-8-staffel-3-feiertage-und-freie-tage', 'only_matching': True, }, ] _TOKEN = 'prosieben' _SALT = '01!8d8F_)r9]4s[qeuXfP%' _CLIENT_NAME = 'kolibri-2.0.19-splec4' _ACCESS_ID = 'x_prosiebenmaxx-de' _ENCRYPTION_KEY = 'Eeyeey9oquahthainoofashoyoikosag' _IV = 'Aeluchoc6aevechuipiexeeboowedaok' _CLIPID_REGEXES = [ r'"clip_id"\s*:\s+"(\d+)"', r'clipid: "(\d+)"', r'clip[iI]d=(\d+)', r'clip[iI][dD]\s*=\s*["\'](\d+)', r"'itemImageUrl'\s*:\s*'/dynamic/thumbnails/full/\d+/(\d+)", r'proMamsId"\s*:\s*"(\d+)', r'proMamsId"\s*:\s*"(\d+)', ] _TITLE_REGEXES = [ r'<h2 class="subtitle" itemprop="name">\s*(.+?)</h2>', r'<header class="clearfix">\s*<h3>(.+?)</h3>', r'<!-- start video -->\s*<h1>(.+?)</h1>', r'<h1 class="att-name">\s*(.+?)</h1>', r'<header class="module_header">\s*<h2>([^<]+)</h2>\s*</header>', r'<h2 class="video-title" itemprop="name">\s*(.+?)</h2>', r'<div[^>]+id="veeseoTitle"[^>]*>(.+?)</div>', r'<h2[^>]+class="subtitle"[^>]*>([^<]+)</h2>', ] _DESCRIPTION_REGEXES = [ r'<p itemprop="description">\s*(.+?)</p>', r'<div class="videoDecription">\s*<p><strong>Beschreibung</strong>: (.+?)</p>', r'<div class="g-plusone" data-size="medium"></div>\s*</div>\s*</header>\s*(.+?)\s*<footer>', r'<p class="att-description">\s*(.+?)\s*</p>', r'<p class="video-description" itemprop="description">\s*(.+?)</p>', r'<div[^>]+id="veeseoDescription"[^>]*>(.+?)</div>', ] _UPLOAD_DATE_REGEXES = [ r'<span>\s*(\d{2}\.\d{2}\.\d{4} \d{2}:\d{2}) \|\s*<span itemprop="duration"', r'<footer>\s*(\d{2}\.\d{2}\.\d{4}) \d{2}:\d{2} Uhr', r'<span style="padding-left: 4px;line-height:20px; color:#404040">(\d{2}\.\d{2}\.\d{4})</span>', r'(\d{2}\.\d{2}\.\d{4}) \| \d{2}:\d{2} Min<br/>', ] _PAGE_TYPE_REGEXES = [ r'<meta name="page_type" content="([^"]+)">', r"'itemType'\s*:\s*'([^']*)'", ] _PLAYLIST_ID_REGEXES = [ r'content[iI]d=(\d+)', r"'itemId'\s*:\s*'([^']*)'", ] _PLAYLIST_CLIP_REGEXES = [ r'(?s)data-qvt=.+?<a href="([^"]+)"', ] def _extract_clip(self, url, webpage): clip_id = self._html_search_regex( self._CLIPID_REGEXES, webpage, 'clip id') title = self._html_search_regex( self._TITLE_REGEXES, webpage, 'title', default=None) or self._og_search_title(webpage) info = self._extract_video_info(url, clip_id) description = self._html_search_regex( self._DESCRIPTION_REGEXES, webpage, 'description', default=None) if description is None: description = self._og_search_description(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate( self._html_search_meta('og:published_time', webpage, 'upload date', default=None) or self._html_search_regex(self._UPLOAD_DATE_REGEXES, webpage, 'upload date', default=None)) json_ld = self._search_json_ld(webpage, clip_id, default={}) return merge_dicts(info, { 'id': clip_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'upload_date': upload_date, }, json_ld) def _extract_playlist(self, url, webpage): playlist_id = self._html_search_regex( self._PLAYLIST_ID_REGEXES, webpage, 'playlist id') playlist = self._parse_json( self._search_regex( r'var\s+contentResources\s*=\s*(\[.+?\]);\s*</script', webpage, 'playlist'), playlist_id) entries = [] for item in playlist: clip_id = item.get('id') or item.get('upc') if not clip_id: continue info = self._extract_video_info(url, clip_id) info.update({ 'id': clip_id, 'title': item.get('title') or item.get('teaser', {}).get('headline'), 'description': item.get('teaser', {}).get('description'), 'thumbnail': item.get('poster'), 'duration': float_or_none(item.get('duration')), 'series': item.get('tvShowTitle'), 'uploader': item.get('broadcastPublisher'), }) entries.append(info) return self.playlist_result(entries, playlist_id) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) page_type = self._search_regex( self._PAGE_TYPE_REGEXES, webpage, 'page type', default='clip').lower() if page_type == 'clip': return self._extract_clip(url, webpage) elif page_type == 'playlist': return self._extract_playlist(url, webpage) else: raise ExtractorError( f'Unsupported page type {page_type}', expected=True) ���������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/prx.py�����������������������������������������������������������0000664�0000000�0000000�00000037373�14675634471�0020061�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( clean_html, int_or_none, mimetype2ext, str_or_none, traverse_obj, unified_timestamp, url_or_none, urljoin, ) class PRXBaseIE(InfoExtractor): PRX_BASE_URL_RE = r'https?://(?:(?:beta|listen)\.)?prx.org/%s' def _call_api(self, item_id, path, query=None, fatal=True, note='Downloading CMS API JSON'): return self._download_json( urljoin('https://cms.prx.org/api/v1/', path), item_id, query=query, fatal=fatal, note=note) @staticmethod def _get_prx_embed_response(response, section): return traverse_obj(response, ('_embedded', f'prx:{section}')) @staticmethod def _extract_file_link(response): return url_or_none(traverse_obj( response, ('_links', 'enclosure', 'href'), expected_type=str)) @classmethod def _extract_image(cls, image_response): if not isinstance(image_response, dict): return return { 'id': str_or_none(image_response.get('id')), 'filesize': image_response.get('size'), 'width': image_response.get('width'), 'height': image_response.get('height'), 'url': cls._extract_file_link(image_response), } @classmethod def _extract_base_info(cls, response): if not isinstance(response, dict): return item_id = str_or_none(response.get('id')) if not item_id: return thumbnail_dict = cls._extract_image(cls._get_prx_embed_response(response, 'image')) description = ( clean_html(response.get('description')) or response.get('shortDescription')) return { 'id': item_id, 'title': response.get('title') or item_id, 'thumbnails': [thumbnail_dict] if thumbnail_dict else None, 'description': description, 'release_timestamp': unified_timestamp(response.get('releasedAt')), 'timestamp': unified_timestamp(response.get('createdAt')), 'modified_timestamp': unified_timestamp(response.get('updatedAt')), 'duration': int_or_none(response.get('duration')), 'tags': response.get('tags'), 'episode_number': int_or_none(response.get('episodeIdentifier')), 'season_number': int_or_none(response.get('seasonIdentifier')), } @classmethod def _extract_series_info(cls, series_response): base_info = cls._extract_base_info(series_response) if not base_info: return account_info = cls._extract_account_info( cls._get_prx_embed_response(series_response, 'account')) or {} return { **base_info, 'channel_id': account_info.get('channel_id'), 'channel_url': account_info.get('channel_url'), 'channel': account_info.get('channel'), 'series': base_info.get('title'), 'series_id': base_info.get('id'), } @classmethod def _extract_account_info(cls, account_response): base_info = cls._extract_base_info(account_response) if not base_info: return name = account_response.get('name') return { **base_info, 'title': name, 'channel_id': base_info.get('id'), 'channel_url': 'https://beta.prx.org/accounts/{}'.format(base_info.get('id')), 'channel': name, } @classmethod def _extract_story_info(cls, story_response): base_info = cls._extract_base_info(story_response) if not base_info: return series = cls._extract_series_info( cls._get_prx_embed_response(story_response, 'series')) or {} account = cls._extract_account_info( cls._get_prx_embed_response(story_response, 'account')) or {} return { **base_info, 'series': series.get('series'), 'series_id': series.get('series_id'), 'channel_id': account.get('channel_id'), 'channel_url': account.get('channel_url'), 'channel': account.get('channel'), } def _entries(self, item_id, endpoint, entry_func, query=None): """ Extract entries from paginated list API @param entry_func: Function to generate entry from response item """ total = 0 for page in itertools.count(1): response = self._call_api(f'{item_id}: page {page}', endpoint, query={ **(query or {}), 'page': page, 'per': 100, }) items = self._get_prx_embed_response(response, 'items') if not response or not items: break yield from filter(None, map(entry_func, items)) total += response['count'] if total >= response['total']: break def _story_playlist_entry(self, response): story = self._extract_story_info(response) if not story: return story.update({ '_type': 'url', 'url': 'https://beta.prx.org/stories/{}'.format(story['id']), 'ie_key': PRXStoryIE.ie_key(), }) return story def _series_playlist_entry(self, response): series = self._extract_series_info(response) if not series: return series.update({ '_type': 'url', 'url': 'https://beta.prx.org/series/{}'.format(series['id']), 'ie_key': PRXSeriesIE.ie_key(), }) return series class PRXStoryIE(PRXBaseIE): _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'stories/(?P<id>\d+)' _TESTS = [ { # Story with season and episode details 'url': 'https://beta.prx.org/stories/399200', 'info_dict': { 'id': '399200', 'title': 'Fly Me To The Moon', 'description': 'md5:43230168390b95d3322048d8a56bf2bb', 'release_timestamp': 1640250000, 'timestamp': 1640208972, 'modified_timestamp': 1641318202, 'duration': 1004, 'tags': 'count:7', 'episode_number': 8, 'season_number': 5, 'series': 'AirSpace', 'series_id': '38057', 'channel_id': '220986', 'channel_url': 'https://beta.prx.org/accounts/220986', 'channel': 'Air and Space Museum', }, 'playlist': [{ 'info_dict': { 'id': '399200_part1', 'title': 'Fly Me To The Moon', 'description': 'md5:43230168390b95d3322048d8a56bf2bb', 'release_timestamp': 1640250000, 'timestamp': 1640208972, 'modified_timestamp': 1641318202, 'duration': 530, 'tags': 'count:7', 'episode_number': 8, 'season_number': 5, 'series': 'AirSpace', 'series_id': '38057', 'channel_id': '220986', 'channel_url': 'https://beta.prx.org/accounts/220986', 'channel': 'Air and Space Museum', 'ext': 'mp3', 'upload_date': '20211222', 'episode': 'Episode 8', 'release_date': '20211223', 'season': 'Season 5', 'modified_date': '20220104', }, }, { 'info_dict': { 'id': '399200_part2', 'title': 'Fly Me To The Moon', 'description': 'md5:43230168390b95d3322048d8a56bf2bb', 'release_timestamp': 1640250000, 'timestamp': 1640208972, 'modified_timestamp': 1641318202, 'duration': 474, 'tags': 'count:7', 'episode_number': 8, 'season_number': 5, 'series': 'AirSpace', 'series_id': '38057', 'channel_id': '220986', 'channel_url': 'https://beta.prx.org/accounts/220986', 'channel': 'Air and Space Museum', 'ext': 'mp3', 'upload_date': '20211222', 'episode': 'Episode 8', 'release_date': '20211223', 'season': 'Season 5', 'modified_date': '20220104', }, }, ], }, { # Story with only split audio 'url': 'https://beta.prx.org/stories/326414', 'info_dict': { 'id': '326414', 'title': 'Massachusetts v EPA', 'description': 'md5:744fffba08f19f4deab69fa8d49d5816', 'timestamp': 1592509124, 'modified_timestamp': 1592510457, 'duration': 3088, 'tags': 'count:0', 'series': 'Outside/In', 'series_id': '36252', 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', }, 'playlist_count': 4, }, { # Story with single combined audio 'url': 'https://beta.prx.org/stories/400404', 'info_dict': { 'id': '400404', 'title': 'Cafe Chill (Episode 2022-01)', 'thumbnails': 'count:1', 'description': 'md5:9f1b5a3cbd64fb159d08c3baa31f1539', 'timestamp': 1641233952, 'modified_timestamp': 1641234248, 'duration': 3540, 'series': 'Café Chill', 'series_id': '37762', 'channel_id': '5767', 'channel_url': 'https://beta.prx.org/accounts/5767', 'channel': 'C89.5 - KNHC Seattle', 'ext': 'mp3', 'tags': 'count:0', 'thumbnail': r're:https?://cms\.prx\.org/pub/\w+/0/web/story_image/767965/medium/Aurora_Over_Trees\.jpg', 'upload_date': '20220103', 'modified_date': '20220103', }, }, { 'url': 'https://listen.prx.org/stories/399200', 'only_matching': True, }, ] def _extract_audio_pieces(self, audio_response): return [{ 'format_id': str_or_none(piece_response.get('id')), 'format_note': str_or_none(piece_response.get('label')), 'filesize': int_or_none(piece_response.get('size')), 'duration': int_or_none(piece_response.get('duration')), 'ext': mimetype2ext(piece_response.get('contentType')), 'asr': int_or_none(piece_response.get('frequency'), scale=1000), 'abr': int_or_none(piece_response.get('bitRate')), 'url': self._extract_file_link(piece_response), 'vcodec': 'none', } for piece_response in sorted( self._get_prx_embed_response(audio_response, 'items') or [], key=lambda p: int_or_none(p.get('position')))] def _extract_story(self, story_response): info = self._extract_story_info(story_response) if not info: return audio_pieces = self._extract_audio_pieces( self._get_prx_embed_response(story_response, 'audio')) if len(audio_pieces) == 1: return { 'formats': audio_pieces, **info, } entries = [{ **info, 'id': '{}_part{}'.format(info['id'], (idx + 1)), 'formats': [fmt], } for idx, fmt in enumerate(audio_pieces)] return { '_type': 'multi_video', 'entries': entries, **info, } def _real_extract(self, url): story_id = self._match_id(url) response = self._call_api(story_id, f'stories/{story_id}') return self._extract_story(response) class PRXSeriesIE(PRXBaseIE): _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'series/(?P<id>\d+)' _TESTS = [ { 'url': 'https://beta.prx.org/series/36252', 'info_dict': { 'id': '36252', 'title': 'Outside/In', 'thumbnails': 'count:1', 'description': 'md5:a6bedc5f810777bcb09ab30ff9059114', 'timestamp': 1470684964, 'modified_timestamp': 1582308830, 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'series': 'Outside/In', 'series_id': '36252', }, 'playlist_mincount': 39, }, { # Blank series 'url': 'https://beta.prx.org/series/25038', 'info_dict': { 'id': '25038', 'title': '25038', 'timestamp': 1207612800, 'modified_timestamp': 1207612800, 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'series': '25038', 'series_id': '25038', }, 'playlist_count': 0, }, ] def _extract_series(self, series_response): info = self._extract_series_info(series_response) return { '_type': 'playlist', 'entries': self._entries(info['id'], 'series/{}/stories'.format(info['id']), self._story_playlist_entry), **info, } def _real_extract(self, url): series_id = self._match_id(url) response = self._call_api(series_id, f'series/{series_id}') return self._extract_series(response) class PRXAccountIE(PRXBaseIE): _VALID_URL = PRXBaseIE.PRX_BASE_URL_RE % r'accounts/(?P<id>\d+)' _TESTS = [{ 'url': 'https://beta.prx.org/accounts/206', 'info_dict': { 'id': '206', 'title': 'New Hampshire Public Radio', 'description': 'md5:277f2395301d0aca563c80c70a18ee0a', 'channel_id': '206', 'channel_url': 'https://beta.prx.org/accounts/206', 'channel': 'New Hampshire Public Radio', 'thumbnails': 'count:1', }, 'playlist_mincount': 380, }] def _extract_account(self, account_response): info = self._extract_account_info(account_response) series = self._entries( info['id'], f'accounts/{info["id"]}/series', self._series_playlist_entry) stories = self._entries( info['id'], f'accounts/{info["id"]}/stories', self._story_playlist_entry) return { '_type': 'playlist', 'entries': itertools.chain(series, stories), **info, } def _real_extract(self, url): account_id = self._match_id(url) response = self._call_api(account_id, f'accounts/{account_id}') return self._extract_account(response) class PRXStoriesSearchIE(PRXBaseIE, SearchInfoExtractor): IE_DESC = 'PRX Stories Search' IE_NAME = 'prxstories:search' _SEARCH_KEY = 'prxstories' def _search_results(self, query): yield from self._entries( f'query {query}', 'stories/search', self._story_playlist_entry, query={'q': query}) class PRXSeriesSearchIE(PRXBaseIE, SearchInfoExtractor): IE_DESC = 'PRX Series Search' IE_NAME = 'prxseries:search' _SEARCH_KEY = 'prxseries' def _search_results(self, query): yield from self._entries( f'query {query}', 'series/search', self._series_playlist_entry, query={'q': query}) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/puhutv.py��������������������������������������������������������0000664�0000000�0000000�00000020153�14675634471�0020567�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, float_or_none, int_or_none, parse_resolution, str_or_none, try_get, unified_timestamp, url_or_none, urljoin, ) class PuhuTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-izle' IE_NAME = 'puhutv' _TESTS = [{ # film 'url': 'https://puhutv.com/sut-kardesler-izle', 'md5': 'a347470371d56e1585d1b2c8dab01c96', 'info_dict': { 'id': '5085', 'display_id': 'sut-kardesler', 'ext': 'mp4', 'title': 'Süt Kardeşler', 'description': 'md5:ca09da25b7e57cbb5a9280d6e48d17aa', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 4832.44, 'creator': 'Arzu Film', 'timestamp': 1561062602, 'upload_date': '20190620', 'release_year': 1976, 'view_count': int, 'tags': list, }, }, { # episode, geo restricted, bypassable with --geo-verification-proxy 'url': 'https://puhutv.com/jet-sosyete-1-bolum-izle', 'only_matching': True, }, { # 4k, with subtitles 'url': 'https://puhutv.com/dip-1-bolum-izle', 'only_matching': True, }] _SUBTITLE_LANGS = { 'English': 'en', 'Deutsch': 'de', 'عربى': 'ar', } def _real_extract(self, url): display_id = self._match_id(url) info = self._download_json( urljoin(url, f'/api/slug/{display_id}-izle'), display_id)['data'] video_id = str(info['id']) show = info.get('title') or {} title = info.get('name') or show['name'] if info.get('display_name'): title = '{} {}'.format(title, info['display_name']) try: videos = self._download_json( f'https://puhutv.com/api/assets/{video_id}/videos', display_id, 'Downloading video JSON', headers=self.geo_verification_headers()) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: self.raise_geo_restricted() raise urls = [] formats = [] for video in videos['data']['videos']: media_url = url_or_none(video.get('url')) if not media_url or media_url in urls: continue urls.append(media_url) playlist = video.get('is_playlist') if (video.get('stream_type') == 'hls' and playlist is True) or 'playlist.m3u8' in media_url: formats.extend(self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) continue quality = int_or_none(video.get('quality')) f = { 'url': media_url, 'ext': 'mp4', 'height': quality, } video_format = video.get('video_format') is_hls = (video_format == 'hls' or '/hls/' in media_url or '/chunklist.m3u8' in media_url) and playlist is False if is_hls: format_id = 'hls' f['protocol'] = 'm3u8_native' elif video_format == 'mp4': format_id = 'http' else: continue if quality: format_id += f'-{quality}p' f['format_id'] = format_id formats.append(f) creator = try_get( show, lambda x: x['producer']['name'], str) content = info.get('content') or {} images = try_get( content, lambda x: x['images']['wide'], dict) or {} thumbnails = [] for image_id, image_url in images.items(): if not isinstance(image_url, str): continue if not image_url.startswith(('http', '//')): image_url = f'https://{image_url}' t = parse_resolution(image_id) t.update({ 'id': image_id, 'url': image_url, }) thumbnails.append(t) tags = [] for genre in show.get('genres') or []: if not isinstance(genre, dict): continue genre_name = genre.get('name') if genre_name and isinstance(genre_name, str): tags.append(genre_name) subtitles = {} for subtitle in content.get('subtitles') or []: if not isinstance(subtitle, dict): continue lang = subtitle.get('language') sub_url = url_or_none(subtitle.get('url') or subtitle.get('file')) if not lang or not isinstance(lang, str) or not sub_url: continue subtitles[self._SUBTITLE_LANGS.get(lang, lang)] = [{ 'url': sub_url, }] return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': info.get('description') or show.get('description'), 'season_id': str_or_none(info.get('season_id')), 'season_number': int_or_none(info.get('season_number')), 'episode_number': int_or_none(info.get('episode_number')), 'release_year': int_or_none(show.get('released_at')), 'timestamp': unified_timestamp(info.get('created_at')), 'creator': creator, 'view_count': int_or_none(content.get('watch_count')), 'duration': float_or_none(content.get('duration_in_ms'), 1000), 'tags': tags, 'subtitles': subtitles, 'thumbnails': thumbnails, 'formats': formats, } class PuhuTVSerieIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?puhutv\.com/(?P<id>[^/?#&]+)-detay' IE_NAME = 'puhutv:serie' _TESTS = [{ 'url': 'https://puhutv.com/deniz-yildizi-detay', 'info_dict': { 'title': 'Deniz Yıldızı', 'id': 'deniz-yildizi', }, 'playlist_mincount': 205, }, { # a film detail page which is using same url with serie page 'url': 'https://puhutv.com/kaybedenler-kulubu-detay', 'only_matching': True, }] def _extract_entries(self, seasons): for season in seasons: season_id = season.get('id') if not season_id: continue page = 1 has_more = True while has_more is True: season = self._download_json( f'https://galadriel.puhutv.com/seasons/{season_id}', season_id, f'Downloading page {page}', query={ 'page': page, 'per': 40, }) episodes = season.get('episodes') if isinstance(episodes, list): for ep in episodes: slug_path = str_or_none(ep.get('slugPath')) if not slug_path: continue video_id = str_or_none(int_or_none(ep.get('id'))) yield self.url_result( f'https://puhutv.com/{slug_path}', ie=PuhuTVIE.ie_key(), video_id=video_id, video_title=ep.get('name') or ep.get('eventLabel')) page += 1 has_more = season.get('hasMore') def _real_extract(self, url): playlist_id = self._match_id(url) info = self._download_json( urljoin(url, f'/api/slug/{playlist_id}-detay'), playlist_id)['data'] seasons = info.get('seasons') if seasons: return self.playlist_result( self._extract_entries(seasons), playlist_id, info.get('name')) # For films, these are using same url with series video_id = info.get('slug') or info['assets'][0]['slug'] return self.url_result( f'https://puhutv.com/{video_id}-izle', PuhuTVIE.ie_key(), video_id) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/puls4.py���������������������������������������������������������0000664�0000000�0000000�00000004232�14675634471�0020303�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .prosiebensat1 import ProSiebenSat1BaseIE from ..utils import parse_duration, unified_strdate class Puls4IE(ProSiebenSat1BaseIE): _VALID_URL = r'https?://(?:www\.)?puls4\.com/(?P<id>[^?#&]+)' _TESTS = [{ 'url': 'http://www.puls4.com/2-minuten-2-millionen/staffel-3/videos/2min2miotalk/Tobias-Homberger-von-myclubs-im-2min2miotalk-118118', 'md5': 'fd3c6b0903ac72c9d004f04bc6bb3e03', 'info_dict': { 'id': '118118', 'ext': 'flv', 'title': 'Tobias Homberger von myclubs im #2min2miotalk', 'description': 'md5:f9def7c5e8745d6026d8885487d91955', 'upload_date': '20160830', 'uploader': 'PULS_4', }, }, { 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident.-Norbert-Hofer', 'only_matching': True, }, { 'url': 'http://www.puls4.com/pro-und-contra/wer-wird-prasident/Ganze-Folgen/Wer-wird-Praesident-Analyse-des-Interviews-mit-Norbert-Hofer-416598', 'only_matching': True, }] _TOKEN = 'puls4' _SALT = '01!kaNgaiNgah1Ie4AeSha' _CLIENT_NAME = '' def _real_extract(self, url): path = self._match_id(url) content_path = self._download_json( 'http://www.puls4.com/api/json-fe/page/' + path, path)['content'][0]['url'] media = self._download_json( 'http://www.puls4.com' + content_path, content_path)['mediaCurrent'] player_content = media['playerContent'] info = self._extract_video_info(url, player_content['id']) info.update({ 'id': str(media['objectId']), 'title': player_content['title'], 'description': media.get('description'), 'thumbnail': media.get('previewLink'), 'upload_date': unified_strdate(media.get('date')), 'duration': parse_duration(player_content.get('duration')), 'episode': player_content.get('episodePartName'), 'show': media.get('channel'), 'season_id': player_content.get('seasonId'), 'uploader': player_content.get('sourceCompany'), }) return info ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/pyvideo.py�������������������������������������������������������0000664�0000000�0000000�00000005162�14675634471�0020716�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import int_or_none class PyvideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?pyvideo\.org/(?P<category>[^/]+)/(?P<id>[^/?#&.]+)' _TESTS = [{ 'url': 'http://pyvideo.org/pycon-us-2013/become-a-logging-expert-in-30-minutes.html', 'info_dict': { 'id': 'become-a-logging-expert-in-30-minutes', }, 'playlist_count': 2, }, { 'url': 'http://pyvideo.org/pygotham-2012/gloriajw-spotifywitherikbernhardsson182m4v.html', 'md5': '5fe1c7e0a8aa5570330784c847ff6d12', 'info_dict': { 'id': '2542', 'ext': 'm4v', 'title': 'Gloriajw-SpotifyWithErikBernhardsson182.m4v', }, }] def _real_extract(self, url): mobj = self._match_valid_url(url) category = mobj.group('category') video_id = mobj.group('id') entries = [] data = self._download_json( f'https://raw.githubusercontent.com/pyvideo/data/master/{category}/videos/{video_id}.json', video_id, fatal=False) if data: for video in data['videos']: video_url = video.get('url') if video_url: if video.get('type') == 'youtube': entries.append(self.url_result(video_url, 'Youtube')) else: entries.append({ 'id': str(data.get('id') or video_id), 'url': video_url, 'title': data['title'], 'description': data.get('description') or data.get('summary'), 'thumbnail': data.get('thumbnail_url'), 'duration': int_or_none(data.get('duration')), }) else: webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage) media_urls = self._search_regex( r'(?s)Media URL:(.+?)</li>', webpage, 'media urls') for m in re.finditer( r'<a[^>]+href=(["\'])(?P<url>http.+?)\1', media_urls): media_url = m.group('url') if re.match(r'https?://www\.youtube\.com/watch\?v=.*', media_url): entries.append(self.url_result(media_url, 'Youtube')) else: entries.append({ 'id': video_id, 'url': media_url, 'title': title, }) return self.playlist_result(entries, video_id) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/qdance.py��������������������������������������������������������0000664�0000000�0000000�00000016415�14675634471�0020475�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import time from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, jwt_decode_hs256, str_or_none, traverse_obj, try_call, url_or_none, ) class QDanceIE(InfoExtractor): _NETRC_MACHINE = 'qdance' _VALID_URL = r'https?://(?:www\.)?q-dance\.com/network/(?:library|live)/(?P<id>[\w-]+)' _TESTS = [{ 'note': 'vod', 'url': 'https://www.q-dance.com/network/library/146542138', 'info_dict': { 'id': '146542138', 'ext': 'mp4', 'title': 'Sound Rush [LIVE] | Defqon.1 Weekend Festival 2022 | Friday | RED', 'display_id': 'sound-rush-live-v3-defqon-1-weekend-festival-2022-friday-red', 'description': 'Relive Defqon.1 - Primal Energy 2022 with the sounds of Sound Rush LIVE at the RED on Friday! 🔥', 'season': 'Defqon.1 Weekend Festival 2022', 'season_id': '31840632', 'series': 'Defqon.1', 'series_id': '31840378', 'thumbnail': 'https://images.q-dance.network/1674829540-20220624171509-220624171509_delio_dn201093-2.jpg', 'availability': 'premium_only', 'duration': 1829, }, 'params': {'skip_download': 'm3u8'}, }, { 'note': 'livestream', 'url': 'https://www.q-dance.com/network/live/149170353', 'info_dict': { 'id': '149170353', 'ext': 'mp4', 'title': r're:^Defqon\.1 2023 - Friday - RED', 'display_id': 'defqon-1-2023-friday-red', 'description': 'md5:3c73fbbd4044e578e696adfc64019163', 'season': 'Defqon.1 Weekend Festival 2023', 'season_id': '141735599', 'series': 'Defqon.1', 'series_id': '31840378', 'thumbnail': 'https://images.q-dance.network/1686849069-area-thumbs_red.png', 'availability': 'subscriber_only', 'live_status': 'is_live', 'channel_id': 'qdancenetwork.video_149170353', }, 'skip': 'Completed livestream', }, { 'note': 'vod with alphanumeric id', 'url': 'https://www.q-dance.com/network/library/WhDleSIWSfeT3Q9ObBKBeA', 'info_dict': { 'id': 'WhDleSIWSfeT3Q9ObBKBeA', 'ext': 'mp4', 'title': 'Aftershock I Defqon.1 Weekend Festival 2023 I Sunday I BLUE', 'display_id': 'naam-i-defqon-1-weekend-festival-2023-i-dag-i-podium', 'description': 'Relive Defqon.1 Path of the Warrior with Aftershock at the BLUE 🔥', 'series': 'Defqon.1', 'series_id': '31840378', 'season': 'Defqon.1 Weekend Festival 2023', 'season_id': '141735599', 'duration': 3507, 'availability': 'premium_only', 'thumbnail': 'https://images.q-dance.network/1698158361-230625-135716-defqon-1-aftershock.jpg', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.q-dance.com/network/library/-uRFKXwmRZGVnve7av9uqA', 'only_matching': True, }] _access_token = None _refresh_token = None def _call_login_api(self, data, note='Logging in'): login = self._download_json( 'https://members.id-t.com/api/auth/login', None, note, headers={ 'content-type': 'application/json', 'brand': 'qdance', 'origin': 'https://www.q-dance.com', 'referer': 'https://www.q-dance.com/', }, data=json.dumps(data, separators=(',', ':')).encode(), expected_status=lambda x: True) tokens = traverse_obj(login, ('data', { '_id-t-accounts-token': ('accessToken', {str}), '_id-t-accounts-refresh': ('refreshToken', {str}), '_id-t-accounts-id-token': ('idToken', {str}), })) if not tokens.get('_id-t-accounts-token'): error = ': '.join(traverse_obj(login, ('error', ('code', 'message'), {str}))) if 'validation_error' not in error: raise ExtractorError(f'Q-Dance API said "{error}"') msg = 'Invalid username or password' if 'email' in data else 'Refresh token has expired' raise ExtractorError(msg, expected=True) for name, value in tokens.items(): self._set_cookie('.q-dance.com', name, value) def _perform_login(self, username, password): self._call_login_api({'email': username, 'password': password}) def _real_initialize(self): cookies = self._get_cookies('https://www.q-dance.com/') self._refresh_token = try_call(lambda: cookies['_id-t-accounts-refresh'].value) self._access_token = try_call(lambda: cookies['_id-t-accounts-token'].value) if not self._access_token: self.raise_login_required() def _get_auth(self): if (try_call(lambda: jwt_decode_hs256(self._access_token)['exp']) or 0) <= int(time.time() - 120): if not self._refresh_token: raise ExtractorError( 'Cannot refresh access token, login with yt-dlp or refresh cookies in browser') self._call_login_api({'refreshToken': self._refresh_token}, note='Refreshing access token') self._real_initialize() return {'Authorization': self._access_token} def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) data = self._search_nuxt_data(webpage, video_id, traverse=('data', 0, 'data')) def extract_availability(level): level = int_or_none(level) or 0 return self._availability( needs_premium=(level >= 20), needs_subscription=(level >= 15), needs_auth=True) info = traverse_obj(data, { 'title': ('title', {str.strip}), 'description': ('description', {str.strip}), 'display_id': ('slug', {str}), 'thumbnail': ('thumbnail', {url_or_none}), 'duration': ('durationInSeconds', {int_or_none}, {lambda x: x or None}), 'availability': ('subscription', 'level', {extract_availability}), 'is_live': ('type', {lambda x: x.lower() == 'live'}), 'artist': ('acts', ..., {str}), 'series': ('event', 'title', {str.strip}), 'series_id': ('event', 'id', {str_or_none}), 'season': ('eventEdition', 'title', {str.strip}), 'season_id': ('eventEdition', 'id', {str_or_none}), 'channel_id': ('pubnub', 'channelName', {str}), }) stream = self._download_json( f'https://dc9h6qmsoymbq.cloudfront.net/api/content/videos/{video_id}/url', video_id, headers=self._get_auth(), expected_status=401) m3u8_url = traverse_obj(stream, ('data', 'url', {url_or_none})) if not m3u8_url and traverse_obj(stream, ('error', 'code')) == 'unauthorized': raise ExtractorError('Your account does not have access to this content', expected=True) formats = self._extract_m3u8_formats( m3u8_url, video_id, fatal=False, live=True) if m3u8_url else [] if not formats: self.raise_no_formats('No active streams found', expected=bool(info.get('is_live'))) return { **info, 'id': video_id, 'formats': formats, } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/qingting.py������������������������������������������������������0000664�0000000�0000000�00000003567�14675634471�0021066�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import traverse_obj class QingTingIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.|m\.)?(?:qingting\.fm|qtfm\.cn)/v?channels/(?P<channel>\d+)/programs/(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.qingting.fm/channels/378005/programs/22257411/', 'md5': '47e6a94f4e621ed832c316fd1888fb3c', 'info_dict': { 'id': '22257411', 'title': '用了十年才修改,谁在乎教科书?', 'channel_id': '378005', 'channel': '睡前消息', 'uploader': '马督工', 'ext': 'm4a', }, }, { 'url': 'https://m.qtfm.cn/vchannels/378005/programs/23023573/', 'md5': '2703120b6abe63b5fa90b975a58f4c0e', 'info_dict': { 'id': '23023573', 'title': '【睡前消息488】重庆山火之后,有图≠真相', 'channel_id': '378005', 'channel': '睡前消息', 'uploader': '马督工', 'ext': 'm4a', }, }] def _real_extract(self, url): channel_id, pid = self._match_valid_url(url).group('channel', 'id') webpage = self._download_webpage( f'https://m.qtfm.cn/vchannels/{channel_id}/programs/{pid}/', pid) info = self._search_json(r'window\.__initStores\s*=', webpage, 'program info', pid) return { 'id': pid, 'title': traverse_obj(info, ('ProgramStore', 'programInfo', 'title')), 'channel_id': channel_id, 'channel': traverse_obj(info, ('ProgramStore', 'channelInfo', 'title')), 'uploader': traverse_obj(info, ('ProgramStore', 'podcasterInfo', 'podcaster', 'nickname')), 'url': traverse_obj(info, ('ProgramStore', 'programInfo', 'audioUrl')), 'vcodec': 'none', 'acodec': 'm4a', 'ext': 'm4a', } �����������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/qqmusic.py�������������������������������������������������������0000664�0000000�0000000�00000045673�14675634471�0020734�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 import functools import json import random import time from .common import InfoExtractor from ..utils import ( ExtractorError, OnDemandPagedList, clean_html, int_or_none, join_nonempty, js_to_json, str_or_none, strip_jsonp, traverse_obj, unescapeHTML, url_or_none, urljoin, ) class QQMusicBaseIE(InfoExtractor): def _get_cookie(self, key, default=None): return getattr(self._get_cookies('https://y.qq.com').get(key), 'value', default) def _get_g_tk(self): n = 5381 for c in self._get_cookie('qqmusic_key', ''): n += (n << 5) + ord(c) return n & 2147483647 def _get_uin(self): return int_or_none(self._get_cookie('uin')) or 0 @property def is_logged_in(self): return bool(self._get_uin() and self._get_cookie('fqm_pvqid')) # Reference: m_r_GetRUin() in top_player.js # http://imgcache.gtimg.cn/music/portal_v3/y/top_player.js @staticmethod def _m_r_get_ruin(): cur_ms = int(time.time() * 1000) % 1000 return int(round(random.random() * 2147483647) * cur_ms % 1E10) def _download_init_data(self, url, mid, fatal=True): webpage = self._download_webpage(url, mid, fatal=fatal) return self._search_json(r'window\.__INITIAL_DATA__\s*=', webpage, 'init data', mid, transform_source=js_to_json, fatal=fatal) def _make_fcu_req(self, req_dict, mid, headers={}, **kwargs): return self._download_json( 'https://u.y.qq.com/cgi-bin/musicu.fcg', mid, data=json.dumps({ 'comm': { 'cv': 0, 'ct': 24, 'format': 'json', 'uin': self._get_uin(), }, **req_dict, }, separators=(',', ':')).encode(), headers=headers, **kwargs) class QQMusicIE(QQMusicBaseIE): IE_NAME = 'qqmusic' IE_DESC = 'QQ音乐' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/songDetail/(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'https://y.qq.com/n/ryqq/songDetail/004Ti8rT003TaZ', 'md5': 'd7adc5c438d12e2cb648cca81593fd47', 'info_dict': { 'id': '004Ti8rT003TaZ', 'ext': 'mp3', 'title': '永夜のパレード (永夜的游行)', 'album': '幻想遊園郷 -Fantastic Park-', 'release_date': '20111230', 'duration': 281, 'creators': ['ケーキ姫', 'JUMA'], 'genres': ['Pop'], 'description': 'md5:b5261f3d595657ae561e9e6aee7eb7d9', 'size': 4501244, 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', 'subtitles': 'count:1', }, }, { 'url': 'https://y.qq.com/n/ryqq/songDetail/004295Et37taLD', 'md5': '5f1e6cea39e182857da7ffc5ef5e6bb8', 'info_dict': { 'id': '004295Et37taLD', 'ext': 'mp3', 'title': '可惜没如果', 'album': '新地球 - 人 (Special Edition)', 'release_date': '20150129', 'duration': 298, 'creators': ['林俊杰'], 'genres': ['Pop'], 'description': 'md5:f568421ff618d2066e74b65a04149c4e', 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, 'skip': 'premium member only', }, { 'note': 'There is no mp3-320 version of this song.', 'url': 'https://y.qq.com/n/ryqq/songDetail/004MsGEo3DdNxV', 'md5': '028aaef1ae13d8a9f4861a92614887f9', 'info_dict': { 'id': '004MsGEo3DdNxV', 'ext': 'mp3', 'title': '如果', 'album': '新传媒电视连续剧金曲系列II', 'release_date': '20050626', 'duration': 220, 'creators': ['李季美'], 'genres': [], 'description': 'md5:fc711212aa623b28534954dc4bd67385', 'size': 3535730, 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, }, { 'note': 'lyrics not in .lrc format', 'url': 'https://y.qq.com/n/ryqq/songDetail/001JyApY11tIp6', 'info_dict': { 'id': '001JyApY11tIp6', 'ext': 'mp3', 'title': 'Shadows Over Transylvania', 'release_date': '19970225', 'creator': 'Dark Funeral', 'description': 'md5:c9b20210587cbcd6836a1c597bab4525', 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, 'params': {'skip_download': True}, 'skip': 'no longer available', }] _FORMATS = { 'F000': {'name': 'flac', 'prefix': 'F000', 'ext': 'flac', 'preference': 60}, 'A000': {'name': 'ape', 'prefix': 'A000', 'ext': 'ape', 'preference': 50}, 'M800': {'name': '320mp3', 'prefix': 'M800', 'ext': 'mp3', 'preference': 40, 'abr': 320}, 'M500': {'name': '128mp3', 'prefix': 'M500', 'ext': 'mp3', 'preference': 30, 'abr': 128}, 'C400': {'name': '96aac', 'prefix': 'C400', 'ext': 'm4a', 'preference': 20, 'abr': 96}, 'C200': {'name': '48aac', 'prefix': 'C200', 'ext': 'm4a', 'preference': 20, 'abr': 48}, } def _real_extract(self, url): mid = self._match_id(url) init_data = self._download_init_data(url, mid, fatal=False) info_data = self._make_fcu_req({'info': { 'module': 'music.pf_song_detail_svr', 'method': 'get_song_detail_yqq', 'param': { 'song_mid': mid, 'song_type': 0, }, }}, mid, note='Downloading song info')['info']['data']['track_info'] media_mid = info_data['file']['media_mid'] data = self._make_fcu_req({ 'req_1': { 'module': 'vkey.GetVkeyServer', 'method': 'CgiGetVkey', 'param': { 'guid': str(self._m_r_get_ruin()), 'songmid': [mid] * len(self._FORMATS), 'songtype': [0] * len(self._FORMATS), 'uin': str(self._get_uin()), 'loginflag': 1, 'platform': '20', 'filename': [f'{f["prefix"]}{media_mid}.{f["ext"]}' for f in self._FORMATS.values()], }, }, 'req_2': { 'module': 'music.musichallSong.PlayLyricInfo', 'method': 'GetPlayLyricInfo', 'param': {'songMID': mid}, }, }, mid, note='Downloading formats and lyric', headers=self.geo_verification_headers()) code = traverse_obj(data, ('req_1', 'code', {int})) if code != 0: raise ExtractorError(f'Failed to download format info, error code {code or "unknown"}') formats = [] for media_info in traverse_obj(data, ( 'req_1', 'data', 'midurlinfo', lambda _, v: v['songmid'] == mid and v['purl']), ): format_key = traverse_obj(media_info, ('filename', {str}, {lambda x: x[:4]})) format_info = self._FORMATS.get(format_key) or {} format_id = format_info.get('name') formats.append({ 'url': urljoin('https://dl.stream.qqmusic.qq.com', media_info['purl']), 'format': format_id, 'format_id': format_id, 'size': traverse_obj(info_data, ('file', f'size_{format_id}', {int_or_none})), 'quality': format_info.get('preference'), 'abr': format_info.get('abr'), 'ext': format_info.get('ext'), 'vcodec': 'none', }) if not formats and not self.is_logged_in: self.raise_login_required() if traverse_obj(data, ('req_2', 'code')): self.report_warning(f'Failed to download lyric, error {data["req_2"]["code"]!r}') lrc_content = traverse_obj(data, ('req_2', 'data', 'lyric', {lambda x: base64.b64decode(x).decode('utf-8')})) info_dict = { 'id': mid, 'formats': formats, **traverse_obj(info_data, { 'title': ('title', {str}), 'album': ('album', 'title', {str}, {lambda x: x or None}), 'release_date': ('time_public', {lambda x: x.replace('-', '') or None}), 'creators': ('singer', ..., 'name', {str}), 'alt_title': ('subtitle', {str}, {lambda x: x or None}), 'duration': ('interval', {int_or_none}), }), **traverse_obj(init_data, ('detail', { 'thumbnail': ('picurl', {url_or_none}), 'description': ('info', 'intro', 'content', ..., 'value', {str}), 'genres': ('info', 'genre', 'content', ..., 'value', {str}, all), }), get_all=False), } if lrc_content: info_dict['subtitles'] = {'origin': [{'ext': 'lrc', 'data': lrc_content}]} info_dict['description'] = join_nonempty(info_dict.get('description'), lrc_content, delim='\n') return info_dict class QQMusicSingerIE(QQMusicBaseIE): IE_NAME = 'qqmusic:singer' IE_DESC = 'QQ音乐 - 歌手' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/singer/(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'https://y.qq.com/n/ryqq/singer/001BLpXF2DyJe2', 'info_dict': { 'id': '001BLpXF2DyJe2', 'title': '林俊杰', 'description': 'md5:10624ce73b06fa400bc846f59b0305fa', 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, 'playlist_mincount': 100, }, { 'url': 'https://y.qq.com/n/ryqq/singer/000Q00f213YzNV', 'info_dict': { 'id': '000Q00f213YzNV', 'title': '桃几OvO', 'description': '小破站小唱见~希望大家喜欢听我唱歌~!', 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, 'playlist_count': 12, 'playlist': [{ 'info_dict': { 'id': '0016cvsy02mmCl', 'ext': 'mp3', 'title': '群青', 'album': '桃几2021年翻唱集', 'release_date': '20210913', 'duration': 248, 'creators': ['桃几OvO'], 'genres': ['Pop'], 'description': 'md5:4296005a04edcb5cdbe0889d5055a7ae', 'size': 3970822, 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', }, }], }] _PAGE_SIZE = 50 def _fetch_page(self, mid, page_size, page_num): data = self._make_fcu_req({'req_1': { 'module': 'music.web_singer_info_svr', 'method': 'get_singer_detail_info', 'param': { 'sort': 5, 'singermid': mid, 'sin': page_num * page_size, 'num': page_size, }}}, mid, note=f'Downloading page {page_num}') yield from traverse_obj(data, ('req_1', 'data', 'songlist', ..., {lambda x: self.url_result( f'https://y.qq.com/n/ryqq/songDetail/{x["mid"]}', QQMusicIE, x['mid'], x.get('title'))})) def _real_extract(self, url): mid = self._match_id(url) init_data = self._download_init_data(url, mid, fatal=False) return self.playlist_result( OnDemandPagedList(functools.partial(self._fetch_page, mid, self._PAGE_SIZE), self._PAGE_SIZE), mid, **traverse_obj(init_data, ('singerDetail', { 'title': ('basic_info', 'name', {str}), 'description': ('ex_info', 'desc', {str}), 'thumbnail': ('pic', 'pic', {url_or_none}), }))) class QQPlaylistBaseIE(InfoExtractor): def _extract_entries(self, info_json, path): for song in traverse_obj(info_json, path): song_mid = song.get('songmid') if not song_mid: continue yield self.url_result( f'https://y.qq.com/n/ryqq/songDetail/{song_mid}', QQMusicIE, song_mid, song.get('songname')) class QQMusicAlbumIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:album' IE_DESC = 'QQ音乐 - 专辑' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/albumDetail/(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'https://y.qq.com/n/ryqq/albumDetail/000gXCTb2AhRR1', 'info_dict': { 'id': '000gXCTb2AhRR1', 'title': '我们都是这样长大的', 'description': 'md5:179c5dce203a5931970d306aa9607ea6', }, 'playlist_count': 4, }, { 'url': 'https://y.qq.com/n/ryqq/albumDetail/002Y5a3b3AlCu3', 'info_dict': { 'id': '002Y5a3b3AlCu3', 'title': '그리고…', 'description': 'md5:a48823755615508a95080e81b51ba729', }, 'playlist_count': 8, }] def _real_extract(self, url): mid = self._match_id(url) album_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_album_info_cp.fcg', mid, 'Download album page', query={'albummid': mid, 'format': 'json'})['data'] entries = self._extract_entries(album_json, ('list', ...)) return self.playlist_result(entries, mid, **traverse_obj(album_json, { 'title': ('name', {str}), 'description': ('desc', {str.strip}), })) class QQMusicToplistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:toplist' IE_DESC = 'QQ音乐 - 排行榜' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/toplist/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://y.qq.com/n/ryqq/toplist/123', 'info_dict': { 'id': '123', 'title': r're:美国热门音乐榜 \d{4}-\d{2}-\d{2}', 'description': '美国热门音乐榜,每周一更新。', }, 'playlist_count': 95, }, { 'url': 'https://y.qq.com/n/ryqq/toplist/3', 'info_dict': { 'id': '3', 'title': r're:巅峰榜·欧美 \d{4}-\d{2}-\d{2}', 'description': 'md5:4def03b60d3644be4c9a36f21fd33857', }, 'playlist_count': 100, }, { 'url': 'https://y.qq.com/n/ryqq/toplist/106', 'info_dict': { 'id': '106', 'title': r're:韩国Mnet榜 \d{4}-\d{2}-\d{2}', 'description': 'md5:cb84b325215e1d21708c615cac82a6e7', }, 'playlist_count': 50, }] def _real_extract(self, url): list_id = self._match_id(url) toplist_json = self._download_json( 'http://i.y.qq.com/v8/fcg-bin/fcg_v8_toplist_cp.fcg', list_id, note='Download toplist page', query={'type': 'toplist', 'topid': list_id, 'format': 'json'}) return self.playlist_result( self._extract_entries(toplist_json, ('songlist', ..., 'data')), list_id, playlist_title=join_nonempty(*traverse_obj( toplist_json, ((('topinfo', 'ListName'), 'update_time'), None)), delim=' '), playlist_description=traverse_obj(toplist_json, ('topinfo', 'info'))) class QQMusicPlaylistIE(QQPlaylistBaseIE): IE_NAME = 'qqmusic:playlist' IE_DESC = 'QQ音乐 - 歌单' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/playlist/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://y.qq.com/n/ryqq/playlist/1374105607', 'info_dict': { 'id': '1374105607', 'title': '易入人心的华语民谣', 'description': '民谣的歌曲易于传唱、、歌词朗朗伤口、旋律简单温馨。属于那种才入耳孔。却上心头的感觉。没有太多的复杂情绪。简单而直接地表达乐者的情绪,就是这样的简单才易入人心。', }, 'playlist_count': 20, }] def _real_extract(self, url): list_id = self._match_id(url) list_json = self._download_json( 'http://i.y.qq.com/qzone-music/fcg-bin/fcg_ucc_getcdinfo_byids_cp.fcg', list_id, 'Download list page', query={'type': 1, 'json': 1, 'utf8': 1, 'onlysong': 0, 'disstid': list_id}, transform_source=strip_jsonp, headers={'Referer': url}) if not len(list_json.get('cdlist', [])): raise ExtractorError(join_nonempty( 'Unable to get playlist info', join_nonempty('code', 'subcode', from_dict=list_json), list_json.get('msg'), delim=': ')) entries = self._extract_entries(list_json, ('cdlist', 0, 'songlist', ...)) return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, { 'title': ('dissname', {str}), 'description': ('desc', {unescapeHTML}, {clean_html}), }))) class QQMusicVideoIE(QQMusicBaseIE): IE_NAME = 'qqmusic:mv' IE_DESC = 'QQ音乐 - MV' _VALID_URL = r'https?://y\.qq\.com/n/ryqq/mv/(?P<id>[0-9A-Za-z]+)' _TESTS = [{ 'url': 'https://y.qq.com/n/ryqq/mv/002Vsarh3SVU8K', 'info_dict': { 'id': '002Vsarh3SVU8K', 'ext': 'mp4', 'title': 'The Chant (Extended Mix / Audio)', 'description': '', 'thumbnail': r're:^https?://.*\.jpg(?:$|[#?])', 'release_timestamp': 1688918400, 'release_date': '20230709', 'duration': 313, 'creators': ['Duke Dumont'], 'view_count': int, }, }] def _parse_url_formats(self, url_data): return traverse_obj(url_data, ('mp4', lambda _, v: v['freeflow_url'], { 'url': ('freeflow_url', 0, {url_or_none}), 'filesize': ('fileSize', {int_or_none}), 'format_id': ('newFileType', {str_or_none}), })) def _real_extract(self, url): video_id = self._match_id(url) video_info = self._make_fcu_req({ 'mvInfo': { 'module': 'music.video.VideoData', 'method': 'get_video_info_batch', 'param': { 'vidlist': [video_id], 'required': [ 'vid', 'type', 'sid', 'cover_pic', 'duration', 'singers', 'video_pay', 'hint', 'code', 'msg', 'name', 'desc', 'playcnt', 'pubdate', 'play_forbid_reason'], }, }, 'mvUrl': { 'module': 'music.stream.MvUrlProxy', 'method': 'GetMvUrls', 'param': {'vids': [video_id]}, }, }, video_id, headers=self.geo_verification_headers()) if traverse_obj(video_info, ('mvInfo', 'data', video_id, 'play_forbid_reason')) == 3: self.raise_geo_restricted() return { 'id': video_id, 'formats': self._parse_url_formats(traverse_obj(video_info, ('mvUrl', 'data', video_id))), **traverse_obj(video_info, ('mvInfo', 'data', video_id, { 'title': ('name', {str}), 'description': ('desc', {str}), 'thumbnail': ('cover_pic', {url_or_none}), 'release_timestamp': ('pubdate', {int_or_none}), 'duration': ('duration', {int_or_none}), 'creators': ('singers', ..., 'name', {str}), 'view_count': ('playcnt', {int_or_none}), })), } ���������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/r7.py������������������������������������������������������������0000664�0000000�0000000�00000011035�14675634471�0017563�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import int_or_none class R7IE(InfoExtractor): _WORKING = False _ENABLED = None # XXX: pass through to GenericIE _VALID_URL = r'''(?x) https?:// (?: (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/| noticias\.r7\.com(?:/[^/]+)+/[^/]+-| player\.r7\.com/video/i/ ) (?P<id>[\da-f]{24}) ''' _TESTS = [{ 'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html', 'md5': '403c4e393617e8e8ddc748978ee8efde', 'info_dict': { 'id': '54e7050b0cf2ff57e0279389', 'ext': 'mp4', 'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"', 'description': 'md5:01812008664be76a6479aa58ec865b72', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 98, 'like_count': int, 'view_count': int, }, }, { 'url': 'http://esportes.r7.com/videos/cigano-manda-recado-aos-fas/idmedia/4e176727b51a048ee6646a1b.html', 'only_matching': True, }, { 'url': 'http://noticias.r7.com/record-news/video/representante-do-instituto-sou-da-paz-fala-sobre-fim-do-estatuto-do-desarmamento-5480fc580cf2285b117f438d/', 'only_matching': True, }, { 'url': 'http://player.r7.com/video/i/54e7050b0cf2ff57e0279389?play=true&video=http://vsh.r7.com/54e7050b0cf2ff57e0279389/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-ATOS_copy.mp4&linkCallback=http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html&thumbnail=http://vtb.r7.com/ER7_RE_BG_MORTE_JOVENS_570kbps_2015-02-2009f17818-cc82-4c8f-86dc-89a66934e633-thumb.jpg&idCategory=192&share=true&layout=full&full=true', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( f'http://player-api.r7.com/video/i/{video_id}', video_id) title = video['title'] formats = [] media_url_hls = video.get('media_url_hls') if media_url_hls: formats.extend(self._extract_m3u8_formats( media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) media_url = video.get('media_url') if media_url: f = { 'url': media_url, 'format_id': 'http', } # m3u8 format always matches the http format, let's copy metadata from # one to another m3u8_formats = list(filter( lambda f: f.get('vcodec') != 'none', formats)) if len(m3u8_formats) == 1: f_copy = m3u8_formats[0].copy() f_copy.update(f) f_copy['protocol'] = 'http' f = f_copy formats.append(f) description = video.get('description') thumbnail = video.get('thumb') duration = int_or_none(video.get('media_duration')) like_count = int_or_none(video.get('likes')) view_count = int_or_none(video.get('views')) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'like_count': like_count, 'view_count': view_count, 'formats': formats, } class R7ArticleIE(InfoExtractor): _WORKING = False _ENABLED = None # XXX: pass through to GenericIE _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)' _TEST = { 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015', 'only_matching': True, } @classmethod def suitable(cls, url): return False if R7IE.suitable(url) else super().suitable(url) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) video_id = self._search_regex( r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})', webpage, 'video id') return self.url_result(f'http://player.r7.com/video/i/{video_id}', R7IE.ie_key()) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiko.py��������������������������������������������������������0000664�0000000�0000000�00000023262�14675634471�0020511�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 import random import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, join_nonempty, time_seconds, try_call, unified_timestamp, update_url_query, ) from ..utils.traversal import traverse_obj class RadikoBaseIE(InfoExtractor): _GEO_BYPASS = False _FULL_KEY = None _HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED = ( 'https://c-rpaa.smartstream.ne.jp', 'https://si-c-radiko.smartstream.ne.jp', 'https://tf-f-rpaa-radiko.smartstream.ne.jp', 'https://tf-c-rpaa-radiko.smartstream.ne.jp', 'https://si-f-radiko.smartstream.ne.jp', 'https://rpaa.smartstream.ne.jp', ) _HOSTS_FOR_TIME_FREE_FFMPEG_SUPPORTED = ( 'https://rd-wowza-radiko.radiko-cf.com', 'https://radiko.jp', 'https://f-radiko.smartstream.ne.jp', ) # Following URL forcibly connects not Time Free but Live _HOSTS_FOR_LIVE = ( 'https://c-radiko.smartstream.ne.jp', ) def _negotiate_token(self): _, auth1_handle = self._download_webpage_handle( 'https://radiko.jp/v2/api/auth1', None, 'Downloading authentication page', headers={ 'x-radiko-app': 'pc_html5', 'x-radiko-app-version': '0.0.1', 'x-radiko-device': 'pc', 'x-radiko-user': 'dummy_user', }) auth1_header = auth1_handle.headers auth_token = auth1_header['X-Radiko-AuthToken'] kl = int(auth1_header['X-Radiko-KeyLength']) ko = int(auth1_header['X-Radiko-KeyOffset']) raw_partial_key = self._extract_full_key()[ko:ko + kl] partial_key = base64.b64encode(raw_partial_key).decode() area_id = self._download_webpage( 'https://radiko.jp/v2/api/auth2', None, 'Authenticating', headers={ 'x-radiko-device': 'pc', 'x-radiko-user': 'dummy_user', 'x-radiko-authtoken': auth_token, 'x-radiko-partialkey': partial_key, }).split(',')[0] if area_id == 'OUT': self.raise_geo_restricted(countries=['JP']) auth_data = (auth_token, area_id) self.cache.store('radiko', 'auth_data', auth_data) return auth_data def _auth_client(self): cachedata = self.cache.load('radiko', 'auth_data') if cachedata is not None: response = self._download_webpage( 'https://radiko.jp/v2/api/auth_check', None, 'Checking cached token', expected_status=401, headers={'X-Radiko-AuthToken': cachedata[0], 'X-Radiko-AreaId': cachedata[1]}) if response == 'OK': return cachedata return self._negotiate_token() def _extract_full_key(self): if self._FULL_KEY: return self._FULL_KEY jscode = self._download_webpage( 'https://radiko.jp/apps/js/playerCommon.js', None, note='Downloading player js code') full_key = self._search_regex( (r"RadikoJSPlayer\([^,]*,\s*(['\"])pc_html5\1,\s*(['\"])(?P<fullkey>[0-9a-f]+)\2,\s*{"), jscode, 'full key', fatal=False, group='fullkey') if full_key: full_key = full_key.encode() else: # use only full key ever known full_key = b'bcd151073c03b352e1ef2fd66c32209da9ca0afa' self._FULL_KEY = full_key return full_key def _find_program(self, video_id, station, cursor): station_program = self._download_xml( f'https://radiko.jp/v3/program/station/weekly/{station}.xml', video_id, note=f'Downloading radio program for {station} station') prog = None for p in station_program.findall('.//prog'): ft_str, to_str = p.attrib['ft'], p.attrib['to'] ft = unified_timestamp(ft_str, False) to = unified_timestamp(to_str, False) if ft <= cursor and cursor < to: prog = p break if not prog: raise ExtractorError('Cannot identify radio program to download!') assert ft, to return prog, station_program, ft, ft_str, to_str def _extract_formats(self, video_id, station, is_onair, ft, cursor, auth_token, area_id, query): m3u8_playlist_data = self._download_xml( f'https://radiko.jp/v3/station/stream/pc_html5/{station}.xml', video_id, note='Downloading stream information') formats = [] found = set() timefree_int = 0 if is_onair else 1 for element in m3u8_playlist_data.findall(f'.//url[@timefree="{timefree_int}"]/playlist_create_url'): pcu = element.text if pcu in found: continue found.add(pcu) playlist_url = update_url_query(pcu, { 'station_id': station, **query, 'l': '15', 'lsid': ''.join(random.choices('0123456789abcdef', k=32)), 'type': 'b', }) time_to_skip = None if is_onair else cursor - ft domain = urllib.parse.urlparse(playlist_url).netloc subformats = self._extract_m3u8_formats( playlist_url, video_id, ext='m4a', live=True, fatal=False, m3u8_id=domain, note=f'Downloading m3u8 information from {domain}', headers={ 'X-Radiko-AreaId': area_id, 'X-Radiko-AuthToken': auth_token, }) for sf in subformats: if (is_onair ^ pcu.startswith(self._HOSTS_FOR_LIVE)) or ( not is_onair and pcu.startswith(self._HOSTS_FOR_TIME_FREE_FFMPEG_UNSUPPORTED)): sf['preference'] = -100 sf['format_note'] = 'not preferred' if not is_onair and timefree_int == 1 and time_to_skip: sf['downloader_options'] = {'ffmpeg_args': ['-ss', str(time_to_skip)]} formats.extend(subformats) return formats def _extract_performers(self, prog): return traverse_obj(prog, ( 'pfm/text()', ..., {lambda x: re.split(r'[//、 ,,]', x)}, ..., {str.strip})) or None class RadikoIE(RadikoBaseIE): _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/ts/(?P<station>[A-Z0-9-]+)/(?P<timestring>\d+)' _TESTS = [{ # QRR (文化放送) station provides <desc> 'url': 'https://radiko.jp/#!/ts/QRR/20210425101300', 'only_matching': True, }, { # FMT (TOKYO FM) station does not provide <desc> 'url': 'https://radiko.jp/#!/ts/FMT/20210810150000', 'only_matching': True, }, { 'url': 'https://radiko.jp/#!/ts/JOAK-FM/20210509090000', 'only_matching': True, }] def _real_extract(self, url): station, timestring = self._match_valid_url(url).group('station', 'timestring') video_id = join_nonempty(station, timestring) vid_int = unified_timestamp(timestring, False) prog, station_program, ft, radio_begin, radio_end = self._find_program(video_id, station, vid_int) auth_token, area_id = self._auth_client() return { 'id': video_id, 'title': try_call(lambda: prog.find('title').text), 'cast': self._extract_performers(prog), 'description': clean_html(try_call(lambda: prog.find('info').text)), 'uploader': try_call(lambda: station_program.find('.//name').text), 'uploader_id': station, 'timestamp': vid_int, 'duration': try_call(lambda: unified_timestamp(radio_end, False) - unified_timestamp(radio_begin, False)), 'is_live': True, 'formats': self._extract_formats( video_id=video_id, station=station, is_onair=False, ft=ft, cursor=vid_int, auth_token=auth_token, area_id=area_id, query={ 'start_at': radio_begin, 'ft': radio_begin, 'end_at': radio_end, 'to': radio_end, 'seek': timestring, }, ), } class RadikoRadioIE(RadikoBaseIE): _VALID_URL = r'https?://(?:www\.)?radiko\.jp/#!/live/(?P<id>[A-Z0-9-]+)' _TESTS = [{ # QRR (文化放送) station provides <desc> 'url': 'https://radiko.jp/#!/live/QRR', 'only_matching': True, }, { # FMT (TOKYO FM) station does not provide <desc> 'url': 'https://radiko.jp/#!/live/FMT', 'only_matching': True, }, { 'url': 'https://radiko.jp/#!/live/JOAK-FM', 'only_matching': True, }] def _real_extract(self, url): station = self._match_id(url) self.report_warning('Downloader will not stop at the end of the program! Press Ctrl+C to stop') auth_token, area_id = self._auth_client() # get current time in JST (GMT+9:00 w/o DST) vid_now = time_seconds(hours=9) prog, station_program, ft, _, _ = self._find_program(station, station, vid_now) title = prog.find('title').text description = clean_html(prog.find('info').text) station_name = station_program.find('.//name').text formats = self._extract_formats( video_id=station, station=station, is_onair=True, ft=ft, cursor=vid_now, auth_token=auth_token, area_id=area_id, query={}) return { 'id': station, 'title': title, 'cast': self._extract_performers(prog), 'description': description, 'uploader': station_name, 'uploader_id': station, 'timestamp': ft, 'formats': formats, 'is_live': True, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiocanada.py���������������������������������������������������0000664�0000000�0000000�00000014147�14675634471�0021470�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, determine_ext, int_or_none, unified_strdate, ) class RadioCanadaIE(InfoExtractor): IE_NAME = 'radiocanada' _VALID_URL = r'(?:radiocanada:|https?://ici\.radio-canada\.ca/widgets/mediaconsole/)(?P<app_code>[^:/]+)[:/](?P<id>[0-9]+)' _TESTS = [ { 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7184272', 'info_dict': { 'id': '7184272', 'ext': 'mp4', 'title': 'Le parcours du tireur capté sur vidéo', 'description': 'Images des caméras de surveillance fournies par la GRC montrant le parcours du tireur d\'Ottawa', 'upload_date': '20141023', }, 'params': { # m3u8 download 'skip_download': True, }, }, { # empty Title 'url': 'http://ici.radio-canada.ca/widgets/mediaconsole/medianet/7754998/', 'info_dict': { 'id': '7754998', 'ext': 'mp4', 'title': 'letelejournal22h', 'description': 'INTEGRALE WEB 22H-TJ', 'upload_date': '20170720', }, 'params': { # m3u8 download 'skip_download': True, }, }, { # with protectionType but not actually DRM protected 'url': 'radiocanada:toutv:140872', 'info_dict': { 'id': '140872', 'title': 'Épisode 1', 'series': 'District 31', }, 'only_matching': True, }, ] _GEO_COUNTRIES = ['CA'] _access_token = None _claims = None def _call_api(self, path, video_id=None, app_code=None, query=None): if not query: query = {} query.update({ 'client_key': '773aea60-0e80-41bb-9c7f-e6d7c3ad17fb', 'output': 'json', }) if video_id: query.update({ 'appCode': app_code, 'idMedia': video_id, }) if self._access_token: query['access_token'] = self._access_token try: return self._download_json( 'https://services.radio-canada.ca/media/' + path, video_id, query=query) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status in (401, 422): data = self._parse_json(e.cause.response.read().decode(), None) error = data.get('error_description') or data['errorMessage']['text'] raise ExtractorError(error, expected=True) raise def _extract_info(self, app_code, video_id): metas = self._call_api('meta/v1/index.ashx', video_id, app_code)['Metas'] def get_meta(name): for meta in metas: if meta.get('name') == name: text = meta.get('text') if text: return text # protectionType does not necessarily mean the video is DRM protected (see # https://github.com/ytdl-org/youtube-dl/pull/18609). if get_meta('protectionType'): self.report_warning('This video is probably DRM protected.') query = { 'connectionType': 'hd', 'deviceType': 'ipad', 'multibitrate': 'true', } if self._claims: query['claims'] = self._claims v_data = self._call_api('validation/v2/', video_id, app_code, query) v_url = v_data.get('url') if not v_url: error = v_data['message'] if error == "Le contenu sélectionné n'est pas disponible dans votre pays": raise self.raise_geo_restricted(error, self._GEO_COUNTRIES) if error == 'Le contenu sélectionné est disponible seulement en premium': self.raise_login_required(error) raise ExtractorError( f'{self.IE_NAME} said: {error}', expected=True) formats = self._extract_m3u8_formats(v_url, video_id, 'mp4') subtitles = {} closed_caption_url = get_meta('closedCaption') or get_meta('closedCaptionHTML5') if closed_caption_url: subtitles['fr'] = [{ 'url': closed_caption_url, 'ext': determine_ext(closed_caption_url, 'vtt'), }] return { 'id': video_id, 'title': get_meta('Title') or get_meta('AV-nomEmission'), 'description': get_meta('Description') or get_meta('ShortDescription'), 'thumbnail': get_meta('imageHR') or get_meta('imageMR') or get_meta('imageBR'), 'duration': int_or_none(get_meta('length')), 'series': get_meta('Emission'), 'season_number': int_or_none('SrcSaison'), 'episode_number': int_or_none('SrcEpisode'), 'upload_date': unified_strdate(get_meta('Date')), 'subtitles': subtitles, 'formats': formats, } def _real_extract(self, url): return self._extract_info(*self._match_valid_url(url).groups()) class RadioCanadaAudioVideoIE(InfoExtractor): IE_NAME = 'radiocanada:audiovideo' _VALID_URL = r'https?://ici\.radio-canada\.ca/([^/]+/)*media-(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://ici.radio-canada.ca/audio-video/media-7527184/barack-obama-au-vietnam', 'info_dict': { 'id': '7527184', 'ext': 'mp4', 'title': 'Barack Obama au Vietnam', 'description': 'Les États-Unis lèvent l\'embargo sur la vente d\'armes qui datait de la guerre du Vietnam', 'upload_date': '20160523', }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'https://ici.radio-canada.ca/info/videos/media-7527184/barack-obama-au-vietnam', 'only_matching': True, }] def _real_extract(self, url): return self.url_result(f'radiocanada:medianet:{self._match_id(url)}') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiocomercial.py������������������������������������������������0000664�0000000�0000000�00000014165�14675634471�0022217�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, extract_attributes, get_element_by_class, get_element_html_by_class, get_element_text_and_html_by_tag, get_elements_html_by_class, int_or_none, join_nonempty, try_call, unified_strdate, update_url, urljoin, ) from ..utils.traversal import traverse_obj class RadioComercialIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/[^/?#]+/t?(?P<season>\d+)/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao/t6/taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas#page-content-wrapper', 'md5': '5f4fe8e485b29d2e8fd495605bc2c7e4', 'info_dict': { 'id': 'taylor-swift-entranhando-se-que-nem-uma-espada-no-ventre-dos-fas', 'ext': 'mp3', 'title': 'Taylor Swift entranhando-se que nem uma espada no ventre dos fãs.', 'release_date': '20231025', 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 6', 'season_number': 6, }, }, { 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3/convenca-me-num-minuto-que-os-lobisomens-existem', 'md5': '47e96c273aef96a8eb160cd6cf46d782', 'info_dict': { 'id': 'convenca-me-num-minuto-que-os-lobisomens-existem', 'ext': 'mp3', 'title': 'Convença-me num minuto que os lobisomens existem', 'release_date': '20231026', 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 3', 'season_number': 3, }, }, { 'url': 'https://radiocomercial.pt/podcasts/inacreditavel-by-ines-castel-branco/t2/o-desastre-de-aviao', 'md5': '69be64255420fec23b7259955d771e54', 'info_dict': { 'id': 'o-desastre-de-aviao', 'ext': 'mp3', 'title': 'O desastre de avião', 'description': 'md5:8a82beeb372641614772baab7246245f', 'release_date': '20231101', 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 2', 'season_number': 2, }, 'params': { # inconsistant md5 'skip_download': True, }, }, { 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/2023/t-n-t-29-de-outubro', 'md5': '91d32d4d4b1407272068b102730fc9fa', 'info_dict': { 'id': 't-n-t-29-de-outubro', 'ext': 'mp3', 'title': 'T.N.T 29 de outubro', 'release_date': '20231029', 'thumbnail': r're:https://radiocomercial.pt/upload/[^.]+.jpg', 'season': 'Season 2023', 'season_number': 2023, }, }] def _real_extract(self, url): video_id, season = self._match_valid_url(url).group('id', 'season') webpage = self._download_webpage(url, video_id) return { 'id': video_id, 'title': self._html_extract_title(webpage), 'description': self._og_search_description(webpage, default=None), 'release_date': unified_strdate(get_element_by_class( 'date', get_element_html_by_class('descriptions', webpage) or '')), 'thumbnail': self._og_search_thumbnail(webpage), 'season_number': int_or_none(season), 'url': extract_attributes(get_element_html_by_class('audiofile', webpage) or '').get('href'), } class RadioComercialPlaylistIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?radiocomercial\.pt/podcasts/(?P<id>[\w-]+)(?:/t?(?P<season>\d+))?/?(?:$|[?#])' _TESTS = [{ 'url': 'https://radiocomercial.pt/podcasts/convenca-me-num-minuto/t3', 'info_dict': { 'id': 'convenca-me-num-minuto_t3', 'title': 'Convença-me num Minuto - Temporada 3', }, 'playlist_mincount': 32, }, { 'url': 'https://radiocomercial.pt/podcasts/o-homem-que-mordeu-o-cao', 'info_dict': { 'id': 'o-homem-que-mordeu-o-cao', 'title': 'O Homem Que Mordeu o Cão', }, 'playlist_mincount': 19, }, { 'url': 'https://radiocomercial.pt/podcasts/as-minhas-coisas-favoritas', 'info_dict': { 'id': 'as-minhas-coisas-favoritas', 'title': 'As Minhas Coisas Favoritas', }, 'playlist_mincount': 131, }, { 'url': 'https://radiocomercial.pt/podcasts/tnt-todos-no-top/t2023', 'info_dict': { 'id': 'tnt-todos-no-top_t2023', 'title': 'TNT - Todos No Top - Temporada 2023', }, 'playlist_mincount': 39, }] def _entries(self, url, playlist_id): for page in itertools.count(1): try: webpage = self._download_webpage( f'{url}/{page}', playlist_id, f'Downloading page {page}') except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 404: break raise episodes = get_elements_html_by_class('tm-ouvir-podcast', webpage) if not episodes: break for url_path in traverse_obj(episodes, (..., {extract_attributes}, 'href')): episode_url = urljoin(url, url_path) if RadioComercialIE.suitable(episode_url): yield episode_url def _real_extract(self, url): podcast, season = self._match_valid_url(url).group('id', 'season') playlist_id = join_nonempty(podcast, season, delim='_t') url = update_url(url, query=None, fragment=None) webpage = self._download_webpage(url, playlist_id) name = try_call(lambda: get_element_text_and_html_by_tag('h1', webpage)[0]) title = name if name == season else join_nonempty(name, season, delim=' - Temporada ') return self.playlist_from_matches( self._entries(url, playlist_id), playlist_id, title, ie=RadioComercialIE) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiode.py�������������������������������������������������������0000664�0000000�0000000�00000003324�14675634471�0020644�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RadioDeIE(InfoExtractor): _WORKING = False IE_NAME = 'radio.de' _VALID_URL = r'https?://(?P<id>.+?)\.(?:radio\.(?:de|at|fr|pt|es|pl|it)|rad\.io)' _TEST = { 'url': 'http://ndr2.radio.de/', 'info_dict': { 'id': 'ndr2', 'ext': 'mp3', 'title': 're:^NDR 2 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:591c49c702db1a33751625ebfb67f273', 'thumbnail': r're:^https?://.*\.png', 'is_live': True, }, 'params': { 'skip_download': True, }, } def _real_extract(self, url): radio_id = self._match_id(url) webpage = self._download_webpage(url, radio_id) jscode = self._search_regex( r"'components/station/stationService':\s*\{\s*'?station'?:\s*(\{.*?\s*\}),\n", webpage, 'broadcast') broadcast = self._parse_json(jscode, radio_id) title = broadcast['name'] description = broadcast.get('description') or broadcast.get('shortDescription') thumbnail = broadcast.get('picture4Url') or broadcast.get('picture4TransUrl') or broadcast.get('logo100x100') formats = [{ 'url': stream['streamUrl'], 'ext': stream['streamContentFormat'].lower(), 'acodec': stream['streamContentFormat'], 'abr': stream['bitRate'], 'asr': stream['sampleRate'], } for stream in broadcast['streamUrls']] return { 'id': radio_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'is_live': True, 'formats': formats, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiofrance.py���������������������������������������������������0000664�0000000�0000000�00000043460�14675634471�0021517�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools import re import urllib.parse from .common import InfoExtractor from ..utils import ( int_or_none, join_nonempty, js_to_json, parse_duration, strftime_or_none, traverse_obj, unified_strdate, urljoin, ) class RadioFranceIE(InfoExtractor): _VALID_URL = r'https?://maison\.radiofrance\.fr/radiovisions/(?P<id>[^?#]+)' IE_NAME = 'radiofrance' _TEST = { 'url': 'http://maison.radiofrance.fr/radiovisions/one-one', 'md5': 'bdbb28ace95ed0e04faab32ba3160daf', 'info_dict': { 'id': 'one-one', 'ext': 'ogg', 'title': 'One to one', 'description': "Plutôt que d'imaginer la radio de demain comme technologie ou comme création de contenu, je veux montrer que quelles que soient ses évolutions, j'ai l'intime conviction que la radio continuera d'être un grand média de proximité pour les auditeurs.", 'uploader': 'Thomas Hercouët', }, } def _real_extract(self, url): m = self._match_valid_url(url) video_id = m.group('id') webpage = self._download_webpage(url, video_id) title = self._html_search_regex(r'<h1>(.*?)</h1>', webpage, 'title') description = self._html_search_regex( r'<div class="bloc_page_wrapper"><div class="text">(.*?)</div>', webpage, 'description', fatal=False) uploader = self._html_search_regex( r'<div class="credit">  © (.*?)</div>', webpage, 'uploader', fatal=False) formats_str = self._html_search_regex( r'class="jp-jplayer[^"]*" data-source="([^"]+)">', webpage, 'audio URLs') formats = [ { 'format_id': fm[0], 'url': fm[1], 'vcodec': 'none', 'quality': i, } for i, fm in enumerate(re.findall(r"([a-z0-9]+)\s*:\s*'([^']+)'", formats_str)) ] return { 'id': video_id, 'title': title, 'formats': formats, 'description': description, 'uploader': uploader, } class RadioFranceBaseIE(InfoExtractor): _VALID_URL_BASE = r'https?://(?:www\.)?radiofrance\.fr' _STATIONS_RE = '|'.join(map(re.escape, ( 'franceculture', 'franceinfo', 'franceinter', 'francemusique', 'fip', 'mouv', ))) def _extract_data_from_webpage(self, webpage, display_id, key): return traverse_obj(self._search_json( r'\bconst\s+data\s*=', webpage, key, display_id, contains_pattern=r'\[\{(?s:.+)\}\]', transform_source=js_to_json), (..., 'data', key, {dict}), get_all=False) or {} class FranceCultureIE(RadioFranceBaseIE): _VALID_URL = rf'''(?x) {RadioFranceBaseIE._VALID_URL_BASE} /(?:{RadioFranceBaseIE._STATIONS_RE}) /podcasts/(?:[^?#]+/)?(?P<display_id>[^?#]+)-(?P<id>\d{{6,}})(?:$|[?#]) ''' _TESTS = [ { 'url': 'https://www.radiofrance.fr/franceculture/podcasts/science-en-questions/la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau-8440487', 'info_dict': { 'id': '8440487', 'display_id': 'la-physique-d-einstein-aiderait-elle-a-comprendre-le-cerveau', 'ext': 'mp3', 'title': 'La physique d’Einstein aiderait-elle à comprendre le cerveau ?', 'description': 'Existerait-il un pont conceptuel entre la physique de l’espace-temps et les neurosciences ?', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'upload_date': '20220514', 'duration': 2750, }, }, { 'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9-30/le-7-9-30-du-vendredi-10-mars-2023-2107675', 'info_dict': { 'id': '2107675', 'display_id': 'le-7-9-30-du-vendredi-10-mars-2023', 'title': 'Inflation alimentaire : comment en sortir ? - Régis Debray et Claude Grange - Cybèle Idelot', 'description': 'md5:36ee74351ede77a314fdebb94026b916', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'upload_date': '20230310', 'duration': 8977, 'ext': 'mp3', }, }, { 'url': 'https://www.radiofrance.fr/franceinter/podcasts/la-rafle-du-vel-d-hiv-une-affaire-d-etat/les-racines-du-crime-episode-1-3715507', 'only_matching': True, }, { 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-sciences/sante-bientot-un-vaccin-contre-l-asthme-allergique-3057200', 'only_matching': True, }, ] def _real_extract(self, url): video_id, display_id = self._match_valid_url(url).group('id', 'display_id') webpage = self._download_webpage(url, display_id) # _search_json_ld doesn't correctly handle this. See https://github.com/yt-dlp/yt-dlp/pull/3874#discussion_r891903846 video_data = self._search_json('', webpage, 'audio data', display_id, contains_pattern=r'{\s*"@type"\s*:\s*"AudioObject".+}') return { 'id': video_id, 'display_id': display_id, 'url': video_data['contentUrl'], 'vcodec': 'none' if video_data.get('encodingFormat') == 'mp3' else None, 'duration': parse_duration(video_data.get('duration')), 'title': self._html_search_regex(r'(?s)<h1[^>]*itemprop="[^"]*name[^"]*"[^>]*>(.+?)</h1>', webpage, 'title', default=self._og_search_title(webpage)), 'description': self._html_search_regex( r'(?s)<meta name="description"\s*content="([^"]+)', webpage, 'description', default=None), 'thumbnail': self._og_search_thumbnail(webpage), 'uploader': self._html_search_regex( r'(?s)<span class="author">(.*?)</span>', webpage, 'uploader', default=None), 'upload_date': unified_strdate(self._search_regex( r'"datePublished"\s*:\s*"([^"]+)', webpage, 'timestamp', fatal=False)), } class RadioFranceLiveIE(RadioFranceBaseIE): _VALID_URL = rf'''(?x) https?://(?:www\.)?radiofrance\.fr /(?P<id>{RadioFranceBaseIE._STATIONS_RE}) /?(?P<substation_id>radio-[\w-]+)?(?:[#?]|$) ''' _TESTS = [{ 'url': 'https://www.radiofrance.fr/franceinter/', 'info_dict': { 'id': 'franceinter', 'title': str, 'live_status': 'is_live', 'ext': 'aac', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.radiofrance.fr/franceculture', 'info_dict': { 'id': 'franceculture', 'title': str, 'live_status': 'is_live', 'ext': 'aac', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.radiofrance.fr/mouv/radio-musique-kids-family', 'info_dict': { 'id': 'mouv-radio-musique-kids-family', 'title': str, 'live_status': 'is_live', 'ext': 'aac', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.radiofrance.fr/mouv/radio-rnb-soul', 'info_dict': { 'id': 'mouv-radio-rnb-soul', 'title': str, 'live_status': 'is_live', 'ext': 'aac', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.radiofrance.fr/mouv/radio-musique-mix', 'info_dict': { 'id': 'mouv-radio-musique-mix', 'title': str, 'live_status': 'is_live', 'ext': 'aac', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.radiofrance.fr/fip/radio-rock', 'info_dict': { 'id': 'fip-radio-rock', 'title': str, 'live_status': 'is_live', 'ext': 'aac', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.radiofrance.fr/mouv', 'only_matching': True, }] def _real_extract(self, url): station_id, substation_id = self._match_valid_url(url).group('id', 'substation_id') if substation_id: webpage = self._download_webpage(url, station_id) api_response = self._extract_data_from_webpage(webpage, station_id, 'webRadioData') else: api_response = self._download_json( f'https://www.radiofrance.fr/{station_id}/api/live', station_id) formats, subtitles = [], {} for media_source in traverse_obj(api_response, (('now', None), 'media', 'sources', lambda _, v: v['url'])): if media_source.get('format') == 'hls': fmts, subs = self._extract_m3u8_formats_and_subtitles(media_source['url'], station_id, fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: formats.append({ 'url': media_source['url'], 'abr': media_source.get('bitrate'), }) return { 'id': join_nonempty(station_id, substation_id), 'title': traverse_obj(api_response, ('visual', 'legend')) or join_nonempty( ('now', 'firstLine', 'title'), ('now', 'secondLine', 'title'), from_dict=api_response, delim=' - '), 'formats': formats, 'subtitles': subtitles, 'is_live': True, } class RadioFrancePlaylistBaseIE(RadioFranceBaseIE): """Subclasses must set _METADATA_KEY""" def _call_api(self, content_id, cursor, page_num): raise NotImplementedError('This method must be implemented by subclasses') def _generate_playlist_entries(self, content_id, content_response): for page_num in itertools.count(2): for entry in content_response['items']: yield self.url_result( f'https://www.radiofrance.fr/{entry["path"]}', url_transparent=True, **traverse_obj(entry, { 'title': 'title', 'description': 'standFirst', 'timestamp': ('publishedDate', {int_or_none}), 'thumbnail': ('visual', 'src'), })) next_cursor = traverse_obj(content_response, (('pagination', None), 'next'), get_all=False) if not next_cursor: break content_response = self._call_api(content_id, next_cursor, page_num) def _real_extract(self, url): display_id = self._match_id(url) metadata = self._download_json( 'https://www.radiofrance.fr/api/v2.1/path', display_id, query={'value': urllib.parse.urlparse(url).path})['content'] content_id = metadata['id'] return self.playlist_result( self._generate_playlist_entries(content_id, metadata[self._METADATA_KEY]), content_id, display_id=display_id, **{**traverse_obj(metadata, { 'title': 'title', 'description': 'standFirst', 'thumbnail': ('visual', 'src'), }), **traverse_obj(metadata, { 'title': 'name', 'description': 'role', })}) class RadioFrancePodcastIE(RadioFrancePlaylistBaseIE): _VALID_URL = rf'''(?x) {RadioFranceBaseIE._VALID_URL_BASE} /(?:{RadioFranceBaseIE._STATIONS_RE}) /podcasts/(?P<id>[\w-]+)/?(?:[?#]|$) ''' _TESTS = [{ 'url': 'https://www.radiofrance.fr/franceinfo/podcasts/le-billet-vert', 'info_dict': { 'id': 'eaf6ef81-a980-4f1c-a7d1-8a75ecd54b17', 'display_id': 'le-billet-vert', 'title': 'Le billet sciences', 'description': 'md5:eb1007b34b0c0a680daaa71525bbd4c1', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 11, }, { 'url': 'https://www.radiofrance.fr/franceinter/podcasts/jean-marie-le-pen-l-obsession-nationale', 'info_dict': { 'id': '566fd524-3074-4fbc-ac69-8696f2152a54', 'display_id': 'jean-marie-le-pen-l-obsession-nationale', 'title': 'Jean-Marie Le Pen, l\'obsession nationale', 'description': 'md5:a07c0cfb894f6d07a62d0ad12c4b7d73', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_count': 7, }, { 'url': 'https://www.radiofrance.fr/franceculture/podcasts/serie-thomas-grjebine', 'info_dict': { 'id': '63c1ddc9-9f15-457a-98b2-411bac63f48d', 'display_id': 'serie-thomas-grjebine', 'title': 'Thomas Grjebine', }, 'playlist_count': 1, }, { 'url': 'https://www.radiofrance.fr/fip/podcasts/certains-l-aiment-fip', 'info_dict': { 'id': '143dff38-e956-4a5d-8576-1c0b7242b99e', 'display_id': 'certains-l-aiment-fip', 'title': 'Certains l’aiment Fip', 'description': 'md5:ff974672ba00d4fd5be80fb001c5b27e', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 321, }, { 'url': 'https://www.radiofrance.fr/franceinter/podcasts/le-7-9', 'only_matching': True, }, { 'url': 'https://www.radiofrance.fr/mouv/podcasts/dirty-mix', 'only_matching': True, }] _METADATA_KEY = 'expressions' def _call_api(self, podcast_id, cursor, page_num): return self._download_json( f'https://www.radiofrance.fr/api/v2.1/concepts/{podcast_id}/expressions', podcast_id, note=f'Downloading page {page_num}', query={'pageCursor': cursor}) class RadioFranceProfileIE(RadioFrancePlaylistBaseIE): _VALID_URL = rf'{RadioFranceBaseIE._VALID_URL_BASE}/personnes/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://www.radiofrance.fr/personnes/thomas-pesquet?p=3', 'info_dict': { 'id': '86c62790-e481-11e2-9f7b-782bcb6744eb', 'display_id': 'thomas-pesquet', 'title': 'Thomas Pesquet', 'description': 'Astronaute à l\'agence spatiale européenne', }, 'playlist_mincount': 212, }, { 'url': 'https://www.radiofrance.fr/personnes/eugenie-bastie', 'info_dict': { 'id': '9593050b-0183-4972-a0b5-d8f699079e02', 'display_id': 'eugenie-bastie', 'title': 'Eugénie Bastié', 'description': 'Journaliste et essayiste', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 39, }, { 'url': 'https://www.radiofrance.fr/personnes/lea-salame', 'only_matching': True, }] _METADATA_KEY = 'documents' def _call_api(self, profile_id, cursor, page_num): resp = self._download_json( f'https://www.radiofrance.fr/api/v2.1/taxonomy/{profile_id}/documents', profile_id, note=f'Downloading page {page_num}', query={ 'relation': 'personality', 'cursor': cursor, }) resp['next'] = traverse_obj(resp, ('pagination', 'next')) return resp class RadioFranceProgramScheduleIE(RadioFranceBaseIE): _VALID_URL = rf'''(?x) {RadioFranceBaseIE._VALID_URL_BASE} /(?P<station>{RadioFranceBaseIE._STATIONS_RE}) /grille-programmes(?:\?date=(?P<date>[\d-]+))? ''' _TESTS = [{ 'url': 'https://www.radiofrance.fr/franceinter/grille-programmes?date=17-02-2023', 'info_dict': { 'id': 'franceinter-program-20230217', 'upload_date': '20230217', }, 'playlist_count': 25, }, { 'url': 'https://www.radiofrance.fr/franceculture/grille-programmes?date=01-02-2023', 'info_dict': { 'id': 'franceculture-program-20230201', 'upload_date': '20230201', }, 'playlist_count': 25, }, { 'url': 'https://www.radiofrance.fr/mouv/grille-programmes?date=19-03-2023', 'info_dict': { 'id': 'mouv-program-20230319', 'upload_date': '20230319', }, 'playlist_count': 3, }, { 'url': 'https://www.radiofrance.fr/francemusique/grille-programmes?date=18-03-2023', 'info_dict': { 'id': 'francemusique-program-20230318', 'upload_date': '20230318', }, 'playlist_count': 15, }, { 'url': 'https://www.radiofrance.fr/franceculture/grille-programmes', 'only_matching': True, }] def _generate_playlist_entries(self, webpage_url, api_response): for entry in traverse_obj(api_response, ('steps', lambda _, v: v['expression']['path'])): yield self.url_result( urljoin(webpage_url, f'/{entry["expression"]["path"]}'), ie=FranceCultureIE, url_transparent=True, **traverse_obj(entry, { 'title': ('expression', 'title'), 'thumbnail': ('expression', 'visual', 'src'), 'timestamp': ('startTime', {int_or_none}), 'series_id': ('concept', 'id'), 'series': ('concept', 'title'), })) def _real_extract(self, url): station, date = self._match_valid_url(url).group('station', 'date') webpage = self._download_webpage(url, station) grid_data = self._extract_data_from_webpage(webpage, station, 'grid') upload_date = strftime_or_none(grid_data.get('date'), '%Y%m%d') return self.playlist_result( self._generate_playlist_entries(url, grid_data), join_nonempty(station, 'program', upload_date), upload_date=upload_date) ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiojavan.py����������������������������������������������������0000664�0000000�0000000�00000005222�14675634471�0021352�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( parse_resolution, str_to_int, unified_strdate, urlencode_postdata, urljoin, ) class RadioJavanIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?' _TEST = { 'url': 'http://www.radiojavan.com/videos/video/chaartaar-ashoobam', 'md5': 'e85208ffa3ca8b83534fca9fe19af95b', 'info_dict': { 'id': 'chaartaar-ashoobam', 'ext': 'mp4', 'title': 'Chaartaar - Ashoobam', 'thumbnail': r're:^https?://.*\.jpe?g$', 'upload_date': '20150215', 'view_count': int, 'like_count': int, 'dislike_count': int, }, } def _real_extract(self, url): video_id = self._match_id(url) download_host = self._download_json( 'https://www.radiojavan.com/videos/video_host', video_id, data=urlencode_postdata({'id': video_id}), headers={ 'Content-Type': 'application/x-www-form-urlencoded', 'Referer': url, }).get('host', 'https://host1.rjmusicmedia.com') webpage = self._download_webpage(url, video_id) formats = [] for format_id, _, video_path in re.findall( r'RJ\.video(?P<format_id>\d+[pPkK])\s*=\s*(["\'])(?P<url>(?:(?!\2).)+)\2', webpage): f = parse_resolution(format_id) f.update({ 'url': urljoin(download_host, video_path), 'format_id': format_id, }) formats.append(f) title = self._og_search_title(webpage) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( r'class="date_added">Date added: ([^<]+)<', webpage, 'upload date', fatal=False)) view_count = str_to_int(self._search_regex( r'class="views">Plays: ([\d,]+)', webpage, 'view count', fatal=False)) like_count = str_to_int(self._search_regex( r'class="rating">([\d,]+) likes', webpage, 'like count', fatal=False)) dislike_count = str_to_int(self._search_regex( r'class="rating">([\d,]+) dislikes', webpage, 'dislike count', fatal=False)) return { 'id': video_id, 'title': title, 'thumbnail': thumbnail, 'upload_date': upload_date, 'view_count': view_count, 'like_count': like_count, 'dislike_count': dislike_count, 'formats': formats, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiokapital.py��������������������������������������������������0000664�0000000�0000000�00000006423�14675634471�0021704�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools import urllib.parse from .common import InfoExtractor from ..utils import clean_html, traverse_obj, unescapeHTML class RadioKapitalBaseIE(InfoExtractor): def _call_api(self, resource, video_id, note='Downloading JSON metadata', qs={}): return self._download_json( f'https://www.radiokapital.pl/wp-json/kapital/v1/{resource}?{urllib.parse.urlencode(qs)}', video_id, note=note) def _parse_episode(self, data): release = '{}{}{}'.format(data['published'][6:11], data['published'][3:6], data['published'][:3]) return { '_type': 'url_transparent', 'url': data['mixcloud_url'], 'ie_key': 'Mixcloud', 'title': unescapeHTML(data['title']), 'description': clean_html(data.get('content')), 'tags': traverse_obj(data, ('tags', ..., 'name')), 'release_date': release, 'series': traverse_obj(data, ('show', 'title')), } class RadioKapitalIE(RadioKapitalBaseIE): IE_NAME = 'radiokapital' _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/[a-z\d-]+/(?P<id>[a-z\d-]+)' _TESTS = [{ 'url': 'https://radiokapital.pl/shows/tutaj-sa-smoki/5-its-okay-to-be-immaterial', 'info_dict': { 'id': 'radiokapital_radio-kapitał-tutaj-są-smoki-5-its-okay-to-be-immaterial-2021-05-20', 'ext': 'm4a', 'title': '#5: It’s okay to\xa0be\xa0immaterial', 'description': 'md5:2499da5fbfb0e88333b7d37ec8e9e4c4', 'uploader': 'Radio Kapitał', 'uploader_id': 'radiokapital', 'timestamp': 1621640164, 'upload_date': '20210521', }, }] def _real_extract(self, url): video_id = self._match_id(url) episode = self._call_api(f'episodes/{video_id}', video_id) return self._parse_episode(episode) class RadioKapitalShowIE(RadioKapitalBaseIE): IE_NAME = 'radiokapital:show' _VALID_URL = r'https?://(?:www\.)?radiokapital\.pl/shows/(?P<id>[a-z\d-]+)/?(?:$|[?#])' _TESTS = [{ 'url': 'https://radiokapital.pl/shows/wesz', 'info_dict': { 'id': '100', 'title': 'WĘSZ', 'description': 'md5:3a557a1e0f31af612b0dcc85b1e0ca5c', }, 'playlist_mincount': 17, }] def _get_episode_list(self, series_id, page_no): return self._call_api( 'episodes', series_id, f'Downloading episode list page #{page_no}', qs={ 'show': series_id, 'page': page_no, }) def _entries(self, series_id): for page_no in itertools.count(1): episode_list = self._get_episode_list(series_id, page_no) yield from (self._parse_episode(ep) for ep in episode_list['items']) if episode_list['next'] is None: break def _real_extract(self, url): series_id = self._match_id(url) show = self._call_api(f'shows/{series_id}', series_id, 'Downloading show metadata') entries = self._entries(series_id) return { '_type': 'playlist', 'entries': entries, 'id': str(show['id']), 'title': show.get('title'), 'description': clean_html(show.get('content')), } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radiozet.py������������������������������������������������������0000664�0000000�0000000�00000004204�14675634471�0021054�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( strip_or_none, traverse_obj, ) class RadioZetPodcastIE(InfoExtractor): _VALID_URL = r'https?://player\.radiozet\.pl\/Podcasty/.*?/(?P<id>.+)' _TEST = { 'url': 'https://player.radiozet.pl/Podcasty/Nie-Ma-Za-Co/O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', 'md5': 'e03665c316b4fbc5f6a8f232948bbba3', 'info_dict': { 'id': '42154', 'display_id': 'O-przedmiotach-szkolnych-ktore-przydaja-sie-w-zyciu', 'title': 'O przedmiotach szkolnych, które przydają się w życiu', 'description': 'md5:fa72bed49da334b09e5b2f79851f185c', 'release_timestamp': 1592985480, 'ext': 'mp3', 'thumbnail': r're:^https?://.*\.png$', 'duration': 83, 'series': 'Nie Ma Za Co', 'creator': 'Katarzyna Pakosińska', }, } def _call_api(self, podcast_id, display_id): return self._download_json( f'https://player.radiozet.pl/api/podcasts/getPodcast/(node)/{podcast_id}/(station)/radiozet', display_id) def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) podcast_id = self._html_search_regex(r'<div.*?\sid="player".*?\sdata-id=[\'"]([^\'"]+)[\'"]', webpage, 'podcast id') data = self._call_api(podcast_id, display_id)['data'][0] return { 'id': podcast_id, 'display_id': display_id, 'title': strip_or_none(data.get('title')), 'description': strip_or_none(traverse_obj(data, ('program', 'desc'))), 'release_timestamp': data.get('published_date'), 'url': traverse_obj(data, ('player', 'stream')), 'thumbnail': traverse_obj(data, ('program', 'image', 'original')), 'duration': traverse_obj(data, ('player', 'duration')), 'series': strip_or_none(traverse_obj(data, ('program', 'title'))), 'creator': strip_or_none(traverse_obj(data, ('presenter', 0, 'title'))), } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/radlive.py�������������������������������������������������������0000664�0000000�0000000�00000015344�14675634471�0020670�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json from .common import InfoExtractor from ..utils import ( ExtractorError, format_field, traverse_obj, try_get, unified_timestamp, ) class RadLiveIE(InfoExtractor): IE_NAME = 'radlive' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/(?P<content_type>feature|episode)/(?P<id>[a-f0-9-]+)' _TESTS = [{ 'url': 'https://rad.live/content/feature/dc5acfbc-761b-4bec-9564-df999905116a', 'md5': '6219d5d31d52de87d21c9cf5b7cb27ff', 'info_dict': { 'id': 'dc5acfbc-761b-4bec-9564-df999905116a', 'ext': 'mp4', 'title': 'Deathpact - Digital Mirage 2 [Full Set]', 'language': 'en', 'thumbnail': 'https://static.12core.net/cb65ae077a079c68380e38f387fbc438.png', 'description': '', 'release_timestamp': 1600185600.0, 'channel': 'Proximity', 'channel_id': '9ce6dd01-70a4-4d59-afb6-d01f807cd009', 'channel_url': 'https://rad.live/content/channel/9ce6dd01-70a4-4d59-afb6-d01f807cd009', }, }, { 'url': 'https://rad.live/content/episode/bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', 'md5': '40b2175f347592125d93e9a344080125', 'info_dict': { 'id': 'bbcf66ec-0d02-4ca0-8dc0-4213eb2429bf', 'ext': 'mp4', 'title': 'E01: Bad Jokes 1', 'language': 'en', 'thumbnail': 'https://lsp.littlstar.com/channels/WHISTLE/BAD_JOKES/SEASON_1/BAD_JOKES_101/poster.jpg', 'description': 'Bad Jokes - Champions, Adam Pally, Super Troopers, Team Edge and 2Hype', 'episode': 'E01: Bad Jokes 1', 'episode_number': 1, 'episode_id': '336', }, }] def _real_extract(self, url): content_type, video_id = self._match_valid_url(url).groups() webpage = self._download_webpage(url, video_id) content_info = json.loads(self._search_regex( r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] video_info = content_info[content_type] if not video_info: raise ExtractorError('Unable to extract video info, make sure the URL is valid') formats = self._extract_m3u8_formats(video_info['assets']['videos'][0]['url'], video_id) data = video_info.get('structured_data', {}) release_date = unified_timestamp(traverse_obj(data, ('releasedEvent', 'startDate'))) channel = next(iter(content_info.get('channels', [])), {}) channel_id = channel.get('lrn', '').split(':')[-1] or None result = { 'id': video_id, 'title': video_info['title'], 'formats': formats, 'language': traverse_obj(data, ('potentialAction', 'target', 'inLanguage')), 'thumbnail': traverse_obj(data, ('image', 'contentUrl')), 'description': data.get('description'), 'release_timestamp': release_date, 'channel': channel.get('name'), 'channel_id': channel_id, 'channel_url': format_field(channel_id, None, 'https://rad.live/content/channel/%s'), } if content_type == 'episode': result.update({ # TODO: Get season number when downloading single episode 'episode': video_info.get('title'), 'episode_number': video_info.get('number'), 'episode_id': video_info.get('id'), }) return result class RadLiveSeasonIE(RadLiveIE): # XXX: Do not subclass from concrete IE IE_NAME = 'radlive:season' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/season/(?P<id>[a-f0-9-]+)' _TESTS = [{ 'url': 'https://rad.live/content/season/08a290f7-c9ef-4e22-9105-c255995a2e75', 'md5': '40b2175f347592125d93e9a344080125', 'info_dict': { 'id': '08a290f7-c9ef-4e22-9105-c255995a2e75', 'title': 'Bad Jokes - Season 1', }, 'playlist_mincount': 5, }] @classmethod def suitable(cls, url): return False if RadLiveIE.suitable(url) else super().suitable(url) def _real_extract(self, url): season_id = self._match_id(url) webpage = self._download_webpage(url, season_id) content_info = json.loads(self._search_regex( r'<script[^>]*type=([\'"])application/json\1[^>]*>(?P<json>{.+?})</script>', webpage, 'video info', group='json'))['props']['pageProps']['initialContentData'] video_info = content_info['season'] entries = [{ '_type': 'url_transparent', 'id': episode['structured_data']['url'].split('/')[-1], 'url': episode['structured_data']['url'], 'series': try_get(content_info, lambda x: x['series']['title']), 'season': video_info['title'], 'season_number': video_info.get('number'), 'season_id': video_info.get('id'), 'ie_key': RadLiveIE.ie_key(), } for episode in video_info['episodes']] return self.playlist_result(entries, season_id, video_info.get('title')) class RadLiveChannelIE(RadLiveIE): # XXX: Do not subclass from concrete IE IE_NAME = 'radlive:channel' _VALID_URL = r'https?://(?:www\.)?rad\.live/content/channel/(?P<id>[a-f0-9-]+)' _TESTS = [{ 'url': 'https://rad.live/content/channel/5c4d8df4-6fa0-413c-81e3-873479b49274', 'md5': '625156a08b7f2b0b849f234e664457ac', 'info_dict': { 'id': '5c4d8df4-6fa0-413c-81e3-873479b49274', 'title': 'Whistle Sports', }, 'playlist_mincount': 7, }] _QUERY = ''' query WebChannelListing ($lrn: ID!) { channel (id:$lrn) { name features { structured_data } } }''' @classmethod def suitable(cls, url): return False if RadLiveIE.suitable(url) else super().suitable(url) def _real_extract(self, url): channel_id = self._match_id(url) graphql = self._download_json( 'https://content.mhq.12core.net/graphql', channel_id, headers={'Content-Type': 'application/json'}, data=json.dumps({ 'query': self._QUERY, 'variables': {'lrn': f'lrn:12core:media:content:channel:{channel_id}'}, }).encode()) data = traverse_obj(graphql, ('data', 'channel')) if not data: raise ExtractorError('Unable to extract video info, make sure the URL is valid') entries = [{ '_type': 'url_transparent', 'url': feature['structured_data']['url'], 'ie_key': RadLiveIE.ie_key(), } for feature in data['features']] return self.playlist_result(entries, channel_id, data.get('name')) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rai.py�����������������������������������������������������������0000664�0000000�0000000�00000102557�14675634471�0020020�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( ExtractorError, GeoRestrictedError, clean_html, determine_ext, filter_dict, int_or_none, join_nonempty, parse_duration, remove_start, strip_or_none, traverse_obj, try_get, unified_strdate, unified_timestamp, update_url_query, urljoin, xpath_text, ) class RaiBaseIE(InfoExtractor): _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' _GEO_COUNTRIES = ['IT'] _GEO_BYPASS = False def _fix_m3u8_formats(self, media_url, video_id): fmts = self._extract_m3u8_formats( media_url, video_id, 'mp4', m3u8_id='hls', fatal=False) # Fix malformed m3u8 manifests by setting audio-only/video-only formats for f in fmts: if not f.get('acodec'): f['acodec'] = 'mp4a' if not f.get('vcodec'): f['vcodec'] = 'avc1' man_url = f['url'] if re.search(r'chunklist(?:_b\d+)*_ao[_.]', man_url): # audio only f['vcodec'] = 'none' elif re.search(r'chunklist(?:_b\d+)*_vo[_.]', man_url): # video only f['acodec'] = 'none' else: # video+audio if f['acodec'] == 'none': f['acodec'] = 'mp4a' if f['vcodec'] == 'none': f['vcodec'] = 'avc1' return fmts def _extract_relinker_info(self, relinker_url, video_id, audio_only=False): def fix_cdata(s): # remove \r\n\t before and after <![CDATA[ ]]> to avoid # polluted text with xpath_text s = re.sub(r'(\]\]>)[\r\n\t]+(</)', '\\1\\2', s) return re.sub(r'(>)[\r\n\t]+(<!\[CDATA\[)', '\\1\\2', s) if not re.match(r'https?://', relinker_url): return {'formats': [{'url': relinker_url}]} # set User-Agent to generic 'Rai' to avoid quality filtering from # the media server and get the maximum qualities available relinker = self._download_xml( relinker_url, video_id, note='Downloading XML metadata', transform_source=fix_cdata, query={'output': 64}, headers={**self.geo_verification_headers(), 'User-Agent': 'Rai'}) if xpath_text(relinker, './license_url', default='{}') != '{}': self.report_drm(video_id) is_live = xpath_text(relinker, './is_live', default='N') == 'Y' duration = parse_duration(xpath_text(relinker, './duration', default=None)) media_url = xpath_text(relinker, './url[@type="content"]', default=None) if not media_url: self.raise_no_formats('The relinker returned no media url') # geo flag is a bit unreliable and not properly set all the time geoprotection = xpath_text(relinker, './geoprotection', default='N') == 'Y' ext = determine_ext(media_url) formats = [] if ext == 'mp3': formats.append({ 'url': media_url, 'vcodec': 'none', 'acodec': 'mp3', 'format_id': 'https-mp3', }) elif ext == 'm3u8' or 'format=m3u8' in media_url: formats.extend(self._fix_m3u8_formats(media_url, video_id)) elif ext == 'f4m': # very likely no longer needed. Cannot find any url that uses it. manifest_url = update_url_query( media_url.replace('manifest#live_hds.f4m', 'manifest.f4m'), {'hdcore': '3.7.0', 'plugin': 'aasp-3.7.0.39.44'}) formats.extend(self._extract_f4m_formats( manifest_url, video_id, f4m_id='hds', fatal=False)) elif ext == 'mp4': bitrate = int_or_none(xpath_text(relinker, './bitrate')) formats.append({ 'url': media_url, 'tbr': bitrate if bitrate > 0 else None, 'format_id': join_nonempty('https', bitrate, delim='-'), }) else: raise ExtractorError('Unrecognized media file found') if (not formats and geoprotection is True) or '/video_no_available.mp4' in media_url: self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) if not audio_only and not is_live: formats.extend(self._create_http_urls(media_url, relinker_url, formats, video_id)) return filter_dict({ 'is_live': is_live, 'duration': duration, 'formats': formats, }) def _create_http_urls(self, manifest_url, relinker_url, fmts, video_id): _MANIFEST_REG = r'/(?P<id>\w+)(?:_(?P<quality>[\d\,]+))?(?:\.mp4)?(?:\.csmil)?/playlist\.m3u8' _MP4_TMPL = '%s&overrideUserAgentRule=mp4-%s' _QUALITY = { # tbr: w, h 250: [352, 198], 400: [512, 288], 600: [512, 288], 700: [512, 288], 800: [700, 394], 1200: [736, 414], 1500: [920, 518], 1800: [1024, 576], 2400: [1280, 720], 3200: [1440, 810], 3600: [1440, 810], 5000: [1920, 1080], 10000: [1920, 1080], } def percentage(number, target, pc=20, roof=125): """check if the target is in the range of number +/- percent""" if not number or number < 0: return False return abs(target - number) < min(float(number) * float(pc) / 100.0, roof) def get_format_info(tbr): import math br = int_or_none(tbr) if len(fmts) == 1 and not br: br = fmts[0].get('tbr') if br and br > 300: tbr = math.floor(br / 100) * 100 else: tbr = 250 # try extracting info from available m3u8 formats format_copy = [None, None] for f in fmts: if f.get('tbr'): if percentage(tbr, f['tbr']): format_copy[0] = f.copy() if [f.get('width'), f.get('height')] == _QUALITY.get(tbr): format_copy[1] = f.copy() format_copy[1]['tbr'] = tbr # prefer format with similar bitrate because there might be # multiple video with the same resolution but different bitrate format_copy = format_copy[0] or format_copy[1] or {} return { 'format_id': f'https-{tbr}', 'width': format_copy.get('width'), 'height': format_copy.get('height'), 'tbr': format_copy.get('tbr') or tbr, 'vcodec': format_copy.get('vcodec') or 'avc1', 'acodec': format_copy.get('acodec') or 'mp4a', 'fps': format_copy.get('fps') or 25, } if format_copy else { 'format_id': f'https-{tbr}', 'width': _QUALITY[tbr][0], 'height': _QUALITY[tbr][1], 'tbr': tbr, 'vcodec': 'avc1', 'acodec': 'mp4a', 'fps': 25, } # Check if MP4 download is available try: self._request_webpage( HEADRequest(_MP4_TMPL % (relinker_url, '*')), video_id, 'Checking MP4 availability') except ExtractorError as e: self.to_screen(f'{video_id}: MP4 direct download is not available: {e.cause}') return [] # filter out single-stream formats fmts = [f for f in fmts if f.get('vcodec') != 'none' and f.get('acodec') != 'none'] mobj = re.search(_MANIFEST_REG, manifest_url) if not mobj: return [] available_qualities = mobj.group('quality').split(',') if mobj.group('quality') else ['*'] formats = [] for q in filter(None, available_qualities): self.write_debug(f'Creating https format for quality {q}') formats.append({ 'url': _MP4_TMPL % (relinker_url, q), 'protocol': 'https', 'ext': 'mp4', **get_format_info(q), }) return formats @staticmethod def _get_thumbnails_list(thumbs, url): return [{ 'url': urljoin(url, thumb_url), } for thumb_url in (thumbs or {}).values() if thumb_url] @staticmethod def _extract_subtitles(url, video_data): STL_EXT = 'stl' SRT_EXT = 'srt' subtitles = {} subtitles_array = video_data.get('subtitlesArray') or video_data.get('subtitleList') or [] for k in ('subtitles', 'subtitlesUrl'): subtitles_array.append({'url': video_data.get(k)}) for subtitle in subtitles_array: sub_url = subtitle.get('url') if sub_url and isinstance(sub_url, str): sub_lang = subtitle.get('language') or 'it' sub_url = urljoin(url, sub_url) sub_ext = determine_ext(sub_url, SRT_EXT) subtitles.setdefault(sub_lang, []).append({ 'ext': sub_ext, 'url': sub_url, }) if STL_EXT == sub_ext: subtitles[sub_lang].append({ 'ext': SRT_EXT, 'url': sub_url[:-len(STL_EXT)] + SRT_EXT, }) return subtitles class RaiPlayIE(RaiBaseIE): _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplay\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)' _TESTS = [{ 'url': 'https://www.raiplay.it/video/2014/04/Report-del-07042014-cb27157f-9dd0-4aee-b788-b1f67643a391.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { 'id': 'cb27157f-9dd0-4aee-b788-b1f67643a391', 'ext': 'mp4', 'title': 'Report del 07/04/2014', 'alt_title': 'St 2013/14 - Report - Espresso nel caffè - 07/04/2014', 'description': 'md5:d730c168a58f4bb35600fc2f881ec04e', 'thumbnail': r're:^https?://www\.raiplay\.it/.+\.jpg', 'uploader': 'Rai 3', 'creator': 'Rai 3', 'duration': 6160, 'series': 'Report', 'season': '2013/14', 'subtitles': {'it': 'count:4'}, 'release_year': 2024, 'episode': 'Espresso nel caffè - 07/04/2014', 'timestamp': 1396919880, 'upload_date': '20140408', 'formats': 'count:4', }, 'params': {'skip_download': True}, }, { # 1080p 'url': 'https://www.raiplay.it/video/2021/11/Blanca-S1E1-Senza-occhi-b1255a4a-8e72-4a2f-b9f3-fc1308e00736.html', 'md5': 'aeda7243115380b2dd5e881fd42d949a', 'info_dict': { 'id': 'b1255a4a-8e72-4a2f-b9f3-fc1308e00736', 'ext': 'mp4', 'title': 'Blanca - S1E1 - Senza occhi', 'alt_title': 'St 1 Ep 1 - Blanca - Senza occhi', 'description': 'md5:75f95d5c030ec8bac263b1212322e28c', 'thumbnail': r're:^https://www\.raiplay\.it/dl/img/.+\.jpg', 'uploader': 'Rai Premium', 'creator': 'Rai Fiction', 'duration': 6493, 'series': 'Blanca', 'season': 'Season 1', 'episode_number': 1, 'release_year': 2021, 'season_number': 1, 'episode': 'Senza occhi', 'timestamp': 1637318940, 'upload_date': '20211119', 'formats': 'count:7', }, 'params': {'skip_download': True}, 'expected_warnings': ['Video not available. Likely due to geo-restriction.'], }, { # 1500 quality 'url': 'https://www.raiplay.it/video/2012/09/S1E11---Tutto-cio-che-luccica-0cab3323-732e-45d6-8e86-7704acab6598.html', 'md5': 'a634d20e8ab2d43724c273563f6bf87a', 'info_dict': { 'id': '0cab3323-732e-45d6-8e86-7704acab6598', 'ext': 'mp4', 'title': 'Mia and Me - S1E11 - Tutto ciò che luccica', 'alt_title': 'St 1 Ep 11 - Mia and Me - Tutto ciò che luccica', 'description': 'md5:4969e594184b1920c4c1f2b704da9dea', 'thumbnail': r're:^https?://.*\.jpg$', 'uploader': 'Rai Gulp', 'series': 'Mia and Me', 'season': 'Season 1', 'episode_number': 11, 'release_year': 2015, 'season_number': 1, 'episode': 'Tutto ciò che luccica', 'timestamp': 1348495020, 'upload_date': '20120924', }, }, { 'url': 'http://www.raiplay.it/video/2016/11/gazebotraindesi-efebe701-969c-4593-92f3-285f0d1ce750.html?', 'only_matching': True, }, { # subtitles at 'subtitlesArray' key (see #27698) 'url': 'https://www.raiplay.it/video/2020/12/Report---04-01-2021-2e90f1de-8eee-4de4-ac0e-78d21db5b600.html', 'only_matching': True, }, { # DRM protected 'url': 'https://www.raiplay.it/video/2021/06/Lo-straordinario-mondo-di-Zoey-S2E1-Lo-straordinario-ritorno-di-Zoey-3ba992de-2332-41ad-9214-73e32ab209f4.html', 'only_matching': True, }] def _real_extract(self, url): base, video_id = self._match_valid_url(url).groups() media = self._download_json( f'{base}.json', video_id, 'Downloading video JSON') if not self.get_param('allow_unplayable_formats'): if traverse_obj(media, (('program_info', None), 'rights_management', 'rights', 'drm')): self.report_drm(video_id) video = media['video'] relinker_info = self._extract_relinker_info(video['content_url'], video_id) date_published = join_nonempty( media.get('date_published'), media.get('time_published'), delim=' ') season = media.get('season') alt_title = join_nonempty(media.get('subtitle'), media.get('toptitle'), delim=' - ') return { 'id': remove_start(media.get('id'), 'ContentItem-') or video_id, 'display_id': video_id, 'title': media.get('name'), 'alt_title': strip_or_none(alt_title or None), 'description': media.get('description'), 'uploader': strip_or_none( traverse_obj(media, ('program_info', 'channel')) or media.get('channel') or None), 'creator': strip_or_none( traverse_obj(media, ('program_info', 'editor')) or media.get('editor') or None), 'duration': parse_duration(video.get('duration')), 'timestamp': unified_timestamp(date_published), 'thumbnails': self._get_thumbnails_list(media.get('images'), url), 'series': traverse_obj(media, ('program_info', 'name')), 'season_number': int_or_none(season), 'season': season if (season and not season.isdigit()) else None, 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'subtitles': self._extract_subtitles(url, video), 'release_year': int_or_none(traverse_obj(media, ('track_info', 'edit_year'))), **relinker_info, } class RaiPlayLiveIE(RaiPlayIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/dirette/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'http://www.raiplay.it/dirette/rainews24', 'info_dict': { 'id': 'd784ad40-e0ae-4a69-aa76-37519d238a9c', 'display_id': 'rainews24', 'ext': 'mp4', 'title': 're:^Diretta di Rai News 24 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'description': 'md5:4d00bcf6dc98b27c6ec480de329d1497', 'uploader': 'Rai News 24', 'creator': 'Rai News 24', 'is_live': True, 'live_status': 'is_live', 'upload_date': '20090502', 'timestamp': 1241276220, 'formats': 'count:3', }, 'params': {'skip_download': True}, }] class RaiPlayPlaylistIE(InfoExtractor): _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplay\.it/programmi/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' _TESTS = [{ # entire series episodes + extras... 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/', 'info_dict': { 'id': 'nondirloalmiocapo', 'title': 'Non dirlo al mio capo', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', }, 'playlist_mincount': 30, }, { # single season 'url': 'https://www.raiplay.it/programmi/nondirloalmiocapo/episodi/stagione-2/', 'info_dict': { 'id': 'nondirloalmiocapo', 'title': 'Non dirlo al mio capo - Stagione 2', 'description': 'md5:98ab6b98f7f44c2843fd7d6f045f153b', }, 'playlist_count': 12, }] def _real_extract(self, url): base, playlist_id, extra_id = self._match_valid_url(url).groups() program = self._download_json( f'{base}.json', playlist_id, 'Downloading program JSON') if extra_id: extra_id = extra_id.upper().rstrip('/') playlist_title = program.get('name') entries = [] for b in (program.get('blocks') or []): for s in (b.get('sets') or []): if extra_id: if extra_id != join_nonempty( b.get('name'), s.get('name'), delim='/').replace(' ', '-').upper(): continue playlist_title = join_nonempty(playlist_title, s.get('name'), delim=' - ') s_id = s.get('id') if not s_id: continue medias = self._download_json( f'{base}/{s_id}.json', s_id, 'Downloading content set JSON', fatal=False) if not medias: continue for m in (medias.get('items') or []): path_id = m.get('path_id') if not path_id: continue video_url = urljoin(url, path_id) entries.append(self.url_result( video_url, ie=RaiPlayIE.ie_key(), video_id=RaiPlayIE._match_id(video_url))) return self.playlist_result( entries, playlist_id, playlist_title, try_get(program, lambda x: x['program_info']['description'])) class RaiPlaySoundIE(RaiBaseIE): _VALID_URL = rf'(?P<base>https?://(?:www\.)?raiplaysound\.it/.+?-(?P<id>{RaiBaseIE._UUID_RE}))\.(?:html|json)' _TESTS = [{ 'url': 'https://www.raiplaysound.it/audio/2021/12/IL-RUGGITO-DEL-CONIGLIO-1ebae2a7-7cdb-42bb-842e-fe0d193e9707.html', 'md5': '8970abf8caf8aef4696e7b1f2adfc696', 'info_dict': { 'id': '1ebae2a7-7cdb-42bb-842e-fe0d193e9707', 'ext': 'mp3', 'title': 'Il Ruggito del Coniglio del 10/12/2021', 'alt_title': 'md5:0e6476cd57858bb0f3fcc835d305b455', 'description': 'md5:2a17d2107e59a4a8faa0e18334139ee2', 'thumbnail': r're:^https?://.+\.jpg$', 'uploader': 'rai radio 2', 'duration': 5685, 'series': 'Il Ruggito del Coniglio', 'episode': 'Il Ruggito del Coniglio del 10/12/2021', 'creator': 'rai radio 2', 'timestamp': 1638346620, 'upload_date': '20211201', }, 'params': {'skip_download': True}, }] def _real_extract(self, url): base, audio_id = self._match_valid_url(url).group('base', 'id') media = self._download_json(f'{base}.json', audio_id, 'Downloading audio JSON') uid = try_get(media, lambda x: remove_start(remove_start(x['uniquename'], 'ContentItem-'), 'Page-')) info = {} formats = [] relinkers = set(traverse_obj(media, (('downloadable_audio', 'audio', ('live', 'cards', 0, 'audio')), 'url'))) for r in relinkers: info = self._extract_relinker_info(r, audio_id, True) formats.extend(info.get('formats')) date_published = try_get(media, (lambda x: f'{x["create_date"]} {x.get("create_time") or ""}', lambda x: x['live']['create_date'])) podcast_info = traverse_obj(media, 'podcast_info', ('live', 'cards', 0)) or {} return { **info, 'id': uid or audio_id, 'display_id': audio_id, 'title': traverse_obj(media, 'title', 'episode_title'), 'alt_title': traverse_obj(media, ('track_info', 'media_name'), expected_type=strip_or_none), 'description': media.get('description'), 'uploader': traverse_obj(media, ('track_info', 'channel'), expected_type=strip_or_none), 'creator': traverse_obj(media, ('track_info', 'editor'), expected_type=strip_or_none), 'timestamp': unified_timestamp(date_published), 'thumbnails': self._get_thumbnails_list(podcast_info.get('images'), url), 'series': podcast_info.get('title'), 'season_number': int_or_none(media.get('season')), 'episode': media.get('episode_title'), 'episode_number': int_or_none(media.get('episode')), 'formats': formats, } class RaiPlaySoundLiveIE(RaiPlaySoundIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?P<id>[^/?#&]+)$)' _TESTS = [{ 'url': 'https://www.raiplaysound.it/radio2', 'info_dict': { 'id': 'b00a50e6-f404-4af6-8f8c-ff3b9af73a44', 'display_id': 'radio2', 'ext': 'mp4', 'title': r're:Rai Radio 2 \d+-\d+-\d+ \d+:\d+', 'thumbnail': r're:^https://www\.raiplaysound\.it/dl/img/.+\.png', 'uploader': 'rai radio 2', 'series': 'Rai Radio 2', 'creator': 'raiplaysound', 'is_live': True, 'live_status': 'is_live', }, 'params': {'skip_download': True}, }] class RaiPlaySoundPlaylistIE(InfoExtractor): _VALID_URL = r'(?P<base>https?://(?:www\.)?raiplaysound\.it/(?:programmi|playlist|audiolibri)/(?P<id>[^/?#&]+))(?:/(?P<extra_id>[^?#&]+))?' _TESTS = [{ # entire show 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio', 'info_dict': { 'id': 'ilruggitodelconiglio', 'title': 'Il Ruggito del Coniglio', 'description': 'md5:62a627b3a2d0635d08fa8b6e0a04f27e', }, 'playlist_mincount': 65, }, { # single season 'url': 'https://www.raiplaysound.it/programmi/ilruggitodelconiglio/puntate/prima-stagione-1995', 'info_dict': { 'id': 'ilruggitodelconiglio_puntate_prima-stagione-1995', 'title': 'Prima Stagione 1995', }, 'playlist_count': 1, }] def _real_extract(self, url): base, playlist_id, extra_id = self._match_valid_url(url).group('base', 'id', 'extra_id') url = f'{base}.json' program = self._download_json(url, playlist_id, 'Downloading program JSON') if extra_id: extra_id = extra_id.rstrip('/') playlist_id += '_' + extra_id.replace('/', '_') path = next(c['path_id'] for c in program.get('filters') or [] if extra_id in c.get('weblink')) program = self._download_json( urljoin('https://www.raiplaysound.it', path), playlist_id, 'Downloading program secondary JSON') entries = [ self.url_result(urljoin(base, c['path_id']), ie=RaiPlaySoundIE.ie_key()) for c in traverse_obj(program, 'cards', ('block', 'cards')) or [] if c.get('path_id')] return self.playlist_result(entries, playlist_id, program.get('title'), traverse_obj(program, ('podcast_info', 'description'))) class RaiIE(RaiBaseIE): _VALID_URL = rf'https?://[^/]+\.(?:rai\.(?:it|tv))/.+?-(?P<id>{RaiBaseIE._UUID_RE})(?:-.+?)?\.html' _TESTS = [{ 'url': 'https://www.raisport.rai.it/dl/raiSport/media/rassegna-stampa-04a9f4bd-b563-40cf-82a6-aad3529cb4a9.html', 'info_dict': { 'id': '04a9f4bd-b563-40cf-82a6-aad3529cb4a9', 'ext': 'mp4', 'title': 'TG PRIMO TEMPO', 'thumbnail': r're:^https?://.*\.jpg', 'duration': 1758, 'upload_date': '20140612', }, 'params': {'skip_download': True}, 'expected_warnings': ['Video not available. Likely due to geo-restriction.'], }, { 'url': 'https://www.rai.it/dl/RaiTV/programmi/media/ContentItem-efb17665-691c-45d5-a60c-5301333cbb0c.html', 'info_dict': { 'id': 'efb17665-691c-45d5-a60c-5301333cbb0c', 'ext': 'mp4', 'title': 'TG1 ore 20:00 del 03/11/2016', 'description': 'TG1 edizione integrale ore 20:00 del giorno 03/11/2016', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2214, 'upload_date': '20161103', }, 'params': {'skip_download': True}, }, { # Direct MMS: Media URL no longer works. 'url': 'http://www.rai.it/dl/RaiTV/programmi/media/ContentItem-b63a4089-ac28-48cf-bca5-9f5b5bc46df5.html', 'only_matching': True, }] def _real_extract(self, url): content_id = self._match_id(url) media = self._download_json( f'https://www.rai.tv/dl/RaiTV/programmi/media/ContentItem-{content_id}.html?json', content_id, 'Downloading video JSON', fatal=False, expected_status=404) if media is None: return None if 'Audio' in media['type']: relinker_info = { 'formats': [{ 'format_id': join_nonempty('https', media.get('formatoAudio'), delim='-'), 'url': media['audioUrl'], 'ext': media.get('formatoAudio'), 'vcodec': 'none', 'acodec': media.get('formatoAudio'), }], } elif 'Video' in media['type']: relinker_info = self._extract_relinker_info(media['mediaUri'], content_id) else: raise ExtractorError('not a media file') thumbnails = self._get_thumbnails_list( {image_type: media.get(image_type) for image_type in ( 'image', 'image_medium', 'image_300')}, url) return { 'id': content_id, 'title': strip_or_none(media.get('name') or media.get('title')), 'description': strip_or_none(media.get('desc')) or None, 'thumbnails': thumbnails, 'uploader': strip_or_none(media.get('author')) or None, 'upload_date': unified_strdate(media.get('date')), 'duration': parse_duration(media.get('length')), 'subtitles': self._extract_subtitles(url, media), **relinker_info, } class RaiNewsIE(RaiBaseIE): _VALID_URL = rf'https?://(www\.)?rainews\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] _TESTS = [{ # new rainews player (#3911) 'url': 'https://www.rainews.it/video/2024/02/membri-della-croce-rossa-evacuano-gli-abitanti-di-un-villaggio-nella-regione-ucraina-di-kharkiv-il-filmato-dallucraina--31e8017c-845c-43f5-9c48-245b43c3a079.html', 'info_dict': { 'id': '31e8017c-845c-43f5-9c48-245b43c3a079', 'ext': 'mp4', 'title': 'md5:1e81364b09de4a149042bac3c7d36f0b', 'duration': 196, 'upload_date': '20240225', 'uploader': 'rainews', 'formats': 'count:2', }, 'params': {'skip_download': True}, }, { # old content with fallback method to extract media urls 'url': 'https://www.rainews.it/dl/rainews/media/Weekend-al-cinema-da-Hollywood-arriva-il-thriller-di-Tate-Taylor-La-ragazza-del-treno-1632c009-c843-4836-bb65-80c33084a64b.html', 'info_dict': { 'id': '1632c009-c843-4836-bb65-80c33084a64b', 'ext': 'mp4', 'title': 'Weekend al cinema, da Hollywood arriva il thriller di Tate Taylor "La ragazza del treno"', 'description': 'I film in uscita questa settimana.', 'thumbnail': r're:^https?://.*\.png$', 'duration': 833, 'upload_date': '20161103', 'formats': 'count:8', }, 'params': {'skip_download': True}, 'expected_warnings': ['unable to extract player_data'], }, { # iframe + drm 'url': 'https://www.rainews.it/iframe/video/2022/07/euro2022-europei-calcio-femminile-italia-belgio-gol-0-1-video-4de06a69-de75-4e32-a657-02f0885f8118.html', 'only_matching': True, }] _PLAYER_TAG = 'news' def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_data = self._search_json( rf'<rai{self._PLAYER_TAG}-player\s*data=\'', webpage, 'player_data', video_id, transform_source=clean_html, default={}) track_info = player_data.get('track_info') relinker_url = traverse_obj(player_data, 'mediapolis', 'content_url') if not relinker_url: # fallback on old implementation for some old content try: return RaiIE._real_extract(self, url) except GeoRestrictedError: raise except ExtractorError as e: raise ExtractorError('Relinker URL not found', cause=e) relinker_info = self._extract_relinker_info(urljoin(url, relinker_url), video_id) return { 'id': video_id, 'title': player_data.get('title') or track_info.get('title') or self._og_search_title(webpage), 'upload_date': unified_strdate(track_info.get('date')), 'uploader': strip_or_none(track_info.get('editor') or None), **relinker_info, } class RaiCulturaIE(RaiNewsIE): # XXX: Do not subclass from concrete IE _VALID_URL = rf'https?://(www\.)?raicultura\.it/(?!articoli)[^?#]+-(?P<id>{RaiBaseIE._UUID_RE})(?:-[^/?#]+)?\.html' _EMBED_REGEX = [rf'<iframe[^>]+data-src="(?P<url>/iframe/[^?#]+?{RaiBaseIE._UUID_RE}\.html)'] _TESTS = [{ 'url': 'https://www.raicultura.it/letteratura/articoli/2018/12/Alberto-Asor-Rosa-Letteratura-e-potere-05ba8775-82b5-45c5-a89d-dd955fbde1fb.html', 'info_dict': { 'id': '05ba8775-82b5-45c5-a89d-dd955fbde1fb', 'ext': 'mp4', 'title': 'Alberto Asor Rosa: Letteratura e potere', 'duration': 1756, 'upload_date': '20181206', 'uploader': 'raicultura', 'formats': 'count:2', }, 'params': {'skip_download': True}, }] _PLAYER_TAG = 'cultura' class RaiSudtirolIE(RaiBaseIE): _VALID_URL = r'https?://raisudtirol\.rai\.it/.+media=(?P<id>\w+)' _TESTS = [{ # mp4 file 'url': 'https://raisudtirol.rai.it/la/index.php?media=Ptv1619729460', 'info_dict': { 'id': 'Ptv1619729460', 'ext': 'mp4', 'title': 'Euro: trasmisciun d\'economia - 29-04-2021 20:51', 'series': 'Euro: trasmisciun d\'economia', 'upload_date': '20210429', 'thumbnail': r're:https://raisudtirol\.rai\.it/img/.+\.jpg', 'uploader': 'raisudtirol', 'formats': 'count:1', }, 'params': {'skip_download': True}, }, { # m3u manifest 'url': 'https://raisudtirol.rai.it/it/kidsplayer.php?lang=it&media=GUGGUG_P1.smil', 'info_dict': { 'id': 'GUGGUG_P1', 'ext': 'mp4', 'title': 'GUGGUG! La Prospettiva - Die Perspektive', 'uploader': 'raisudtirol', 'formats': 'count:6', }, 'params': {'skip_download': True}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_date = self._html_search_regex( r'<span class="med_data">(.+?)</span>', webpage, 'video_date', default=None) video_title = self._html_search_regex([ r'<span class="med_title">(.+?)</span>', r'title: \'(.+?)\','], webpage, 'video_title', default=None) video_url = self._html_search_regex([ r'sources:\s*\[\{file:\s*"(.+?)"\}\]', r'<source\s+src="(.+?)"\s+type="application/x-mpegURL"'], webpage, 'video_url', default=None) ext = determine_ext(video_url) if ext == 'm3u8': formats = self._extract_m3u8_formats(video_url, video_id) elif ext == 'mp4': formats = [{ 'format_id': 'https-mp4', 'url': self._proto_relative_url(video_url), 'width': 1024, 'height': 576, 'fps': 25, 'vcodec': 'avc1', 'acodec': 'mp4a', }] else: formats = [] self.raise_no_formats(f'Unrecognized media file: {video_url}') return { 'id': video_id, 'title': join_nonempty(video_title, video_date, delim=' - '), 'series': video_title if video_date else None, 'upload_date': unified_strdate(video_date), 'thumbnail': urljoin('https://raisudtirol.rai.it/', self._html_search_regex( r'image: \'(.+?)\'', webpage, 'video_thumb', default=None)), 'uploader': 'raisudtirol', 'formats': formats, } �������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/raywenderlich.py�������������������������������������������������0000664�0000000�0000000�00000013622�14675634471�0022077�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from .vimeo import VimeoIE from ..utils import ( ExtractorError, int_or_none, merge_dicts, try_get, unescapeHTML, unified_timestamp, urljoin, ) class RayWenderlichIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: videos\.raywenderlich\.com/courses| (?:www\.)?raywenderlich\.com )/ (?P<course_id>[^/]+)/lessons/(?P<id>\d+) ''' _TESTS = [{ 'url': 'https://www.raywenderlich.com/3530-testing-in-ios/lessons/1', 'info_dict': { 'id': '248377018', 'ext': 'mp4', 'title': 'Introduction', 'description': 'md5:804d031b3efa9fcb49777d512d74f722', 'timestamp': 1513906277, 'upload_date': '20171222', 'duration': 133, 'uploader': 'Ray Wenderlich', 'uploader_id': 'user3304672', }, 'params': { 'noplaylist': True, 'skip_download': True, }, 'add_ie': [VimeoIE.ie_key()], 'expected_warnings': ['HTTP Error 403: Forbidden'], }, { 'url': 'https://videos.raywenderlich.com/courses/105-testing-in-ios/lessons/1', 'only_matching': True, }] @staticmethod def _extract_video_id(data, lesson_id): if not data: return groups = try_get(data, lambda x: x['groups'], list) or [] if not groups: return for group in groups: if not isinstance(group, dict): continue contents = try_get(data, lambda x: x['contents'], list) or [] for content in contents: if not isinstance(content, dict): continue ordinal = int_or_none(content.get('ordinal')) if ordinal != lesson_id: continue video_id = content.get('identifier') if video_id: return str(video_id) def _real_extract(self, url): mobj = self._match_valid_url(url) course_id, lesson_id = mobj.group('course_id', 'id') display_id = f'{course_id}/{lesson_id}' webpage = self._download_webpage(url, display_id) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._html_search_meta( 'twitter:image', webpage, 'thumbnail') if '>Subscribe to unlock' in webpage: raise ExtractorError( 'This content is only available for subscribers', expected=True) info = { 'thumbnail': thumbnail, } vimeo_id = self._search_regex( r'data-vimeo-id=["\'](\d+)', webpage, 'vimeo id', default=None) if not vimeo_id: data = self._parse_json( self._search_regex( r'data-collection=(["\'])(?P<data>{.+?})\1', webpage, 'data collection', default='{}', group='data'), display_id, transform_source=unescapeHTML, fatal=False) video_id = self._extract_video_id( data, lesson_id) or self._search_regex( r'/videos/(\d+)/', thumbnail, 'video id') headers = { 'Referer': url, 'X-Requested-With': 'XMLHttpRequest', } csrf_token = self._html_search_meta( 'csrf-token', webpage, 'csrf token', default=None) if csrf_token: headers['X-CSRF-Token'] = csrf_token video = self._download_json( f'https://videos.raywenderlich.com/api/v1/videos/{video_id}.json', display_id, headers=headers)['video'] vimeo_id = video['clips'][0]['provider_id'] info.update({ '_type': 'url_transparent', 'title': video.get('name'), 'description': video.get('description') or video.get( 'meta_description'), 'duration': int_or_none(video.get('duration')), 'timestamp': unified_timestamp(video.get('created_at')), }) return merge_dicts(info, self.url_result( VimeoIE._smuggle_referrer( f'https://player.vimeo.com/video/{vimeo_id}', url), ie=VimeoIE.ie_key(), video_id=vimeo_id)) class RayWenderlichCourseIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: videos\.raywenderlich\.com/courses| (?:www\.)?raywenderlich\.com )/ (?P<id>[^/]+) ''' _TEST = { 'url': 'https://www.raywenderlich.com/3530-testing-in-ios', 'info_dict': { 'title': 'Testing in iOS', 'id': '3530-testing-in-ios', }, 'params': { 'noplaylist': False, }, 'playlist_count': 29, } @classmethod def suitable(cls, url): return False if RayWenderlichIE.suitable(url) else super().suitable(url) def _real_extract(self, url): course_id = self._match_id(url) webpage = self._download_webpage(url, course_id) entries = [] lesson_urls = set() for lesson_url in re.findall( rf'<a[^>]+\bhref=["\'](/{course_id}/lessons/\d+)', webpage): if lesson_url in lesson_urls: continue lesson_urls.add(lesson_url) entries.append(self.url_result( urljoin(url, lesson_url), ie=RayWenderlichIE.ie_key())) title = self._og_search_title( webpage, default=None) or self._html_search_meta( 'twitter:title', webpage, 'title', default=None) return self.playlist_result(entries, course_id, title) ��������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rbgtum.py��������������������������������������������������������0000664�0000000�0000000�00000012336�14675634471�0020540�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj class RbgTumIE(InfoExtractor): _VALID_URL = r'https?://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)' _TESTS = [{ # Combined view 'url': 'https://live.rbg.tum.de/w/cpp/22128', 'md5': '53a5e7b3e07128e33bbf36687fe1c08f', 'info_dict': { 'id': 'cpp/22128', 'ext': 'mp4', 'title': 'Lecture: October 18. 2022', 'series': 'Concepts of C++ programming (IN2377)', }, }, { # Presentation only 'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES', 'md5': '36c584272179f3e56b0db5d880639cba', 'info_dict': { 'id': 'I2DL/12349/PRES', 'ext': 'mp4', 'title': 'Lecture 3: Introduction to Neural Networks', 'series': 'Introduction to Deep Learning (IN2346)', }, }, { # Camera only 'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM', 'md5': 'e04189d92ff2f56aedf5cede65d37aad', 'info_dict': { 'id': 'fvv-info/16130/CAM', 'ext': 'mp4', 'title': 'Fachschaftsvollversammlung', 'series': 'Fachschaftsvollversammlung Informatik', }, }, { 'url': 'https://tum.live/w/linalginfo/27102', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8') lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False) lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ') formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') return { 'id': video_id, 'title': lecture_title, 'series': lecture_series_title, 'formats': formats, } class RbgTumCourseIE(InfoExtractor): _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))' _TESTS = [{ 'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv', 'info_dict': { 'title': 'Funktionale Programmierung und Verifikation (IN0003)', 'id': '2022/S/fpv', }, 'params': { 'noplaylist': False, }, 'playlist_count': 13, }, { 'url': 'https://live.rbg.tum.de/old/course/2022/W/set', 'info_dict': { 'title': 'SET FSMPIC', 'id': '2022/W/set', }, 'params': { 'noplaylist': False, }, 'playlist_count': 6, }, { 'url': 'https://tum.live/old/course/2023/S/linalginfo', 'only_matching': True, }] def _real_extract(self, url): course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug') meta = self._download_json( f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False, query={'year': year, 'term': term}) or {} lecture_series_title = meta.get('Name') lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE) for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))] if not lectures: webpage = self._download_webpage(url, course_id) lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ') lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE) for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)] return self.playlist_result(lectures, course_id, lecture_series_title) class RbgTumNewCourseIE(InfoExtractor): _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?' _TESTS = [{ 'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3', 'info_dict': { 'title': 'Funktionale Programmierung und Verifikation (IN0003)', 'id': '2022/S/fpv', }, 'params': { 'noplaylist': False, }, 'playlist_count': 13, }, { 'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3', 'info_dict': { 'title': 'SET FSMPIC', 'id': '2022/W/set', }, 'params': { 'noplaylist': False, }, 'playlist_count': 6, }, { 'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3', 'only_matching': True, }] def _real_extract(self, url): query = parse_qs(url) errors = [key for key in ('year', 'term', 'slug') if not query.get(key)] if errors: raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}') year, term, slug = query['year'][0], query['term'][0], query['slug'][0] hostname = self._match_valid_url(url).group('hostname') return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rcs.py�����������������������������������������������������������0000664�0000000�0000000�00000037610�14675634471�0020031�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( ExtractorError, base_url, clean_html, extract_attributes, get_element_html_by_class, get_element_html_by_id, int_or_none, js_to_json, mimetype2ext, sanitize_url, traverse_obj, try_call, url_basename, urljoin, ) class RCSBaseIE(InfoExtractor): # based on VideoPlayerLoader.prototype.getVideoSrc # and VideoPlayerLoader.prototype.transformSrc from # https://js2.corriereobjects.it/includes2013/LIBS/js/corriere_video.sjs _UUID_RE = r'[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}' _RCS_ID_RE = r'[\w-]+-\d{10}' _MIGRATION_MAP = { 'videoamica-vh.akamaihd': 'amica', 'media2-amica-it.akamaized': 'amica', 'corrierevam-vh.akamaihd': 'corriere', 'media2vam-corriere-it.akamaized': 'corriere', 'cormezzogiorno-vh.akamaihd': 'corrieredelmezzogiorno', 'media2vam-mezzogiorno-corriere-it.akamaized': 'corrieredelmezzogiorno', 'corveneto-vh.akamaihd': 'corrieredelveneto', 'media2vam-veneto-corriere-it.akamaized': 'corrieredelveneto', 'corbologna-vh.akamaihd': 'corrieredibologna', 'media2vam-bologna-corriere-it.akamaized': 'corrieredibologna', 'corfiorentino-vh.akamaihd': 'corrierefiorentino', 'media2vam-fiorentino-corriere-it.akamaized': 'corrierefiorentino', 'corinnovazione-vh.akamaihd': 'corriereinnovazione', 'media2-gazzanet-gazzetta-it.akamaized': 'gazzanet', 'videogazzanet-vh.akamaihd': 'gazzanet', 'videogazzaworld-vh.akamaihd': 'gazzaworld', 'gazzettavam-vh.akamaihd': 'gazzetta', 'media2vam-gazzetta-it.akamaized': 'gazzetta', 'videoiodonna-vh.akamaihd': 'iodonna', 'media2-leitv-it.akamaized': 'leitv', 'videoleitv-vh.akamaihd': 'leitv', 'videoliving-vh.akamaihd': 'living', 'media2-living-corriere-it.akamaized': 'living', 'media2-oggi-it.akamaized': 'oggi', 'videooggi-vh.akamaihd': 'oggi', 'media2-quimamme-it.akamaized': 'quimamme', 'quimamme-vh.akamaihd': 'quimamme', 'videorunning-vh.akamaihd': 'running', 'media2-style-corriere-it.akamaized': 'style', 'style-vh.akamaihd': 'style', 'videostyle-vh.akamaihd': 'style', 'media2-stylepiccoli-it.akamaized': 'stylepiccoli', 'stylepiccoli-vh.akamaihd': 'stylepiccoli', 'doveviaggi-vh.akamaihd': 'viaggi', 'media2-doveviaggi-it.akamaized': 'viaggi', 'media2-vivimilano-corriere-it.akamaized': 'vivimilano', 'vivimilano-vh.akamaihd': 'vivimilano', 'media2-youreporter-it.akamaized': 'youreporter', } def _get_video_src(self, video): for source in traverse_obj(video, ( 'mediaProfile', 'mediaFile', lambda _, v: v.get('mimeType'))): url = source['value'] for s, r in ( ('media2vam.corriere.it.edgesuite.net', 'media2vam-corriere-it.akamaized.net'), ('media.youreporter.it.edgesuite.net', 'media-youreporter-it.akamaized.net'), ('corrierepmd.corriere.it.edgesuite.net', 'corrierepmd-corriere-it.akamaized.net'), ('media2vam-corriere-it.akamaized.net/fcs.quotidiani/vr/videos/', 'video.corriere.it/vr360/videos/'), ('http://', 'https://'), ): url = url.replace(s, r) type_ = mimetype2ext(source['mimeType']) if type_ == 'm3u8' and '-vh.akamaihd' in url: # still needed for some old content: see _TESTS #3 matches = re.search(r'(?:https?:)?//(?P<host>[\w\.\-]+)\.net/i(?P<path>.+)$', url) if matches: url = f'https://vod.rcsobjects.it/hls/{self._MIGRATION_MAP[matches.group("host")]}{matches.group("path")}' if traverse_obj(video, ('mediaProfile', 'geoblocking')) or ( type_ == 'm3u8' and 'fcs.quotidiani_!' in url): url = url.replace('vod.rcsobjects', 'vod-it.rcsobjects') if type_ == 'm3u8' and 'vod' in url: url = url.replace('.csmil', '.urlset') if type_ == 'mp3': url = url.replace('media2vam-corriere-it.akamaized.net', 'vod.rcsobjects.it/corriere') yield { 'type': type_, 'url': url, 'bitrate': source.get('bitrate'), } def _create_http_formats(self, m3u8_formats, video_id): for f in m3u8_formats: if f['vcodec'] == 'none': continue http_url = re.sub(r'(https?://[^/]+)/hls/([^?#]+?\.mp4).+', r'\g<1>/\g<2>', f['url']) if http_url == f['url']: continue http_f = f.copy() del http_f['manifest_url'] format_id = try_call(lambda: http_f['format_id'].replace('hls-', 'https-')) urlh = self._request_webpage(HEADRequest(http_url), video_id, fatal=False, note=f'Check filesize for {format_id}') if not urlh: continue http_f.update({ 'format_id': format_id, 'url': http_url, 'protocol': 'https', 'filesize_approx': int_or_none(urlh.headers.get('Content-Length', None)), }) yield http_f def _create_formats(self, sources, video_id): for source in sources: if source['type'] == 'm3u8': m3u8_formats = self._extract_m3u8_formats( source['url'], video_id, 'mp4', m3u8_id='hls', fatal=False) yield from m3u8_formats yield from self._create_http_formats(m3u8_formats, video_id) elif source['type'] == 'mp3': yield { 'format_id': 'https-mp3', 'ext': 'mp3', 'acodec': 'mp3', 'vcodec': 'none', 'abr': source.get('bitrate'), 'url': source['url'], } def _real_extract(self, url): cdn, video_id = self._match_valid_url(url).group('cdn', 'id') display_id, video_data = None, None if re.match(self._UUID_RE, video_id) or re.match(self._RCS_ID_RE, video_id): url = f'https://video.{cdn}/video-json/{video_id}' else: webpage = self._download_webpage(url, video_id) data_config = get_element_html_by_id('divVideoPlayer', webpage) or get_element_html_by_class('divVideoPlayer', webpage) if data_config: data_config = self._parse_json( extract_attributes(data_config).get('data-config'), video_id, fatal=False) or {} if data_config.get('newspaper'): cdn = f'{data_config["newspaper"]}.it' display_id, video_id = video_id, data_config.get('uuid') or video_id url = f'https://video.{cdn}/video-json/{video_id}' else: json_url = self._search_regex( r'''(?x)url\s*=\s*(["']) (?P<url> (?:https?:)?//video\.rcs\.it /fragment-includes/video-includes/[^"']+?\.json )\1;''', webpage, video_id, group='url', default=None) if json_url: video_data = self._download_json(sanitize_url(json_url, scheme='https'), video_id) display_id, video_id = video_id, video_data.get('id') or video_id if not video_data: webpage = self._download_webpage(url, video_id) video_data = self._search_json( '##start-video##', webpage, 'video data', video_id, default=None, end_pattern='##end-video##', transform_source=js_to_json) if not video_data: # try search for iframes emb = RCSEmbedsIE._extract_url(webpage) if emb: return { '_type': 'url_transparent', 'url': emb, 'ie_key': RCSEmbedsIE.ie_key(), } if not video_data: raise ExtractorError('Video data not found in the page') return { 'id': video_id, 'display_id': display_id, 'title': video_data.get('title'), 'description': (clean_html(video_data.get('description')) or clean_html(video_data.get('htmlDescription')) or self._html_search_meta('description', webpage)), 'uploader': video_data.get('provider') or cdn, 'formats': list(self._create_formats(self._get_video_src(video_data), video_id)), } class RCSEmbedsIE(RCSBaseIE): _VALID_URL = r'''(?x) https?://(?P<vid>video)\. (?P<cdn> (?: rcs| (?:corriere\w+\.)?corriere| (?:gazzanet\.)?gazzetta )\.it) /video-embed/(?P<id>[^/=&\?]+?)(?:$|\?)''' _EMBED_REGEX = [r'''(?x) (?: data-frame-src=| <iframe[^\n]+src= ) (["']) (?P<url>(?:https?:)?//video\. (?: rcs| (?:corriere\w+\.)?corriere| (?:gazzanet\.)?gazzetta ) \.it/video-embed/.+?) \1'''] _TESTS = [{ 'url': 'https://video.rcs.it/video-embed/iodonna-0001585037', 'md5': '0faca97df525032bb9847f690bc3720c', 'info_dict': { 'id': 'iodonna-0001585037', 'ext': 'mp4', 'title': 'Sky Arte racconta Madonna nella serie "Artist to icon"', 'description': 'md5:65b09633df9ffee57f48b39e34c9e067', 'uploader': 'rcs.it', }, }, { 'url': 'https://video.gazzanet.gazzetta.it/video-embed/gazzanet-mo05-0000260789', 'only_matching': True, }, { 'url': 'https://video.gazzetta.it/video-embed/49612410-00ca-11eb-bcd8-30d4253e0140', 'only_matching': True, }] _WEBPAGE_TESTS = [{ 'url': 'https://www.iodonna.it/video-iodonna/personaggi-video/monica-bellucci-piu-del-lavoro-oggi-per-me-sono-importanti-lamicizia-e-la-famiglia/', 'info_dict': { 'id': 'iodonna-0002033648', 'ext': 'mp4', 'title': 'Monica Bellucci: «Più del lavoro, oggi per me sono importanti l\'amicizia e la famiglia»', 'description': 'md5:daea6d9837351e56b1ab615c06bebac1', 'uploader': 'rcs.it', }, }] @staticmethod def _sanitize_url(url): url = sanitize_url(url, scheme='https') return urljoin(base_url(url), url_basename(url)) @classmethod def _extract_embed_urls(cls, url, webpage): return map(cls._sanitize_url, super()._extract_embed_urls(url, webpage)) class RCSIE(RCSBaseIE): _VALID_URL = r'''(?x)https?://(?P<vid>video|viaggi)\. (?P<cdn> (?: corrieredelmezzogiorno\. |corrieredelveneto\. |corrieredibologna\. |corrierefiorentino\. )?corriere\.it |(?:gazzanet\.)?gazzetta\.it) /(?!video-embed/)[^?#]+?/(?P<id>[^/\?]+)(?=\?|/$|$)''' _TESTS = [{ # json iframe directly from id 'url': 'https://video.corriere.it/sport/formula-1/vettel-guida-ferrari-sf90-mugello-suo-fianco-c-elecrerc-bendato-video-esilarante/b727632a-f9d0-11ea-91b0-38d50a849abb', 'md5': '14946840dec46ecfddf66ba4eea7d2b2', 'info_dict': { 'id': 'b727632a-f9d0-11ea-91b0-38d50a849abb', 'ext': 'mp4', 'title': 'Vettel guida la Ferrari SF90 al Mugello e al suo fianco c\'è Leclerc (bendato): il video è esilarante', 'description': 'md5:3915ce5ebb3d2571deb69a5eb85ac9b5', 'uploader': 'Corriere Tv', }, }, { # search for video id inside the page 'url': 'https://viaggi.corriere.it/video/norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen/', 'md5': 'f22a92d9e666e80f2fffbf2825359c81', 'info_dict': { 'id': '5b7cd134-e2c1-11ea-89b3-b56dd0df2aa2', 'display_id': 'norvegia-il-nuovo-ponte-spettacolare-sopra-la-cascata-di-voringsfossen', 'ext': 'mp4', 'title': 'La nuova spettacolare attrazione in Norvegia: il ponte sopra Vøringsfossen', 'description': 'md5:18b35a291f6746c0c8dacd16e5f5f4f8', 'uploader': 'DOVE Viaggi', }, }, { # only audio format https://github.com/yt-dlp/yt-dlp/issues/5683 'url': 'https://video.corriere.it/cronaca/audio-telefonata-il-papa-becciu-santita-lettera-che-mi-ha-inviato-condanna/b94c0d20-70c2-11ed-9572-e4b947a0ebd2', 'md5': 'aaffb08d02f2ce4292a4654694c78150', 'info_dict': { 'id': 'b94c0d20-70c2-11ed-9572-e4b947a0ebd2', 'ext': 'mp3', 'title': 'L\'audio della telefonata tra il Papa e Becciu: «Santità, la lettera che mi ha inviato è una condanna»', 'description': 'md5:c0ddb61bd94a8d4e0d4bb9cda50a689b', 'uploader': 'Corriere Tv', 'formats': [{'format_id': 'https-mp3', 'ext': 'mp3'}], }, }, { # old content still needs cdn migration 'url': 'https://viaggi.corriere.it/video/milano-varallo-sesia-sul-treno-a-vapore/', 'md5': '2dfdce7af249654ad27eeba03fe1e08d', 'info_dict': { 'id': 'd8f6c8d0-f7d7-11e8-bfca-f74cf4634191', 'display_id': 'milano-varallo-sesia-sul-treno-a-vapore', 'ext': 'mp4', 'title': 'Milano-Varallo Sesia sul treno a vapore', 'description': 'md5:6348f47aac230397fe341a74f7678d53', 'uploader': 'DOVE Viaggi', }, }, { 'url': 'https://video.corriere.it/video-360/metro-copenaghen-tutta-italiana/a248a7f0-e2db-11e9-9830-af2de6b1f945', 'only_matching': True, }] class RCSVariousIE(RCSBaseIE): _VALID_URL = r'''(?x)https?://www\. (?P<cdn> leitv\.it| youreporter\.it| amica\.it )/(?:[^/]+/)?(?P<id>[^/]+?)(?:$|\?|/)''' _TESTS = [{ 'url': 'https://www.leitv.it/benessere/mal-di-testa/', 'md5': '3b7a683d105a7313ec7513b014443631', 'info_dict': { 'id': 'leitv-0000125151', 'display_id': 'mal-di-testa', 'ext': 'mp4', 'title': 'Cervicalgia e mal di testa, il video con i suggerimenti dell\'esperto', 'description': 'md5:ae21418f34cee0b8d02a487f55bcabb5', 'uploader': 'leitv.it', }, }, { 'url': 'https://www.youreporter.it/fiume-sesia-3-ottobre-2020/', 'md5': '3989b6d603482611a2abd2f32b79f739', 'info_dict': { 'id': 'youreporter-0000332574', 'display_id': 'fiume-sesia-3-ottobre-2020', 'ext': 'mp4', 'title': 'Fiume Sesia 3 ottobre 2020', 'description': 'md5:0070eef1cc884d13c970a4125063de55', 'uploader': 'youreporter.it', }, }, { 'url': 'https://www.amica.it/video-post/saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi/', 'md5': '187cce524dfd0343c95646c047375fc4', 'info_dict': { 'id': 'amica-0001225365', 'display_id': 'saint-omer-al-cinema-il-film-leone-dargento-che-ribalta-gli-stereotipi', 'ext': 'mp4', 'title': '"Saint Omer": al cinema il film Leone d\'argento che ribalta gli stereotipi', 'description': 'md5:b1c8869c2dcfd6073a2a311ba0008aa8', 'uploader': 'rcs.it', }, }] ������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rcti.py����������������������������������������������������������0000664�0000000�0000000�00000040075�14675634471�0020202�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import random import time from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, dict_get, strip_or_none, traverse_obj, try_get, ) class RCTIPlusBaseIE(InfoExtractor): def _real_initialize(self): self._AUTH_KEY = self._download_json( 'https://api.rctiplus.com/api/v1/visitor?platform=web', # platform can be web, mweb, android, ios None, 'Fetching authorization key')['data']['access_token'] def _call_api(self, url, video_id, note=None): json = self._download_json( url, video_id, note=note, headers={'Authorization': self._AUTH_KEY}) if json.get('status', {}).get('code', 0) != 0: raise ExtractorError(f'{self.IE_NAME} said: {json["status"]["message_client"]}', cause=json) return json.get('data'), json.get('meta') class RCTIPlusIE(RCTIPlusBaseIE): _VALID_URL = r'https?://www\.rctiplus\.com/(?:programs/\d+?/.*?/)?(?P<type>episode|clip|extra|live-event|missed-event)/(?P<id>\d+)/(?P<display_id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.rctiplus.com/programs/1259/kiko-untuk-lola/episode/22124/untuk-lola', 'md5': '56ed45affad45fa18d5592a1bc199997', 'info_dict': { 'id': 'v_e22124', 'title': 'Untuk Lola', 'display_id': 'untuk-lola', 'description': 'md5:2b809075c0b1e071e228ad6d13e41deb', 'ext': 'mp4', 'duration': 1400, 'timestamp': 1615978800, 'upload_date': '20210317', 'series': 'Kiko : Untuk Lola', 'season_number': 1, 'episode_number': 1, 'channel': 'RCTI', }, 'params': { 'fixup': 'never', }, }, { # Clip; Series title doesn't appear on metadata JSON 'url': 'https://www.rctiplus.com/programs/316/cahaya-terindah/clip/3921/make-a-wish', 'md5': 'd179b2ff356f0e91a53bcc6a4d8504f0', 'info_dict': { 'id': 'v_c3921', 'title': 'Make A Wish', 'display_id': 'make-a-wish', 'description': 'Make A Wish', 'ext': 'mp4', 'duration': 288, 'timestamp': 1571652600, 'upload_date': '20191021', 'series': 'Cahaya Terindah', 'channel': 'RCTI', }, 'params': { 'fixup': 'never', }, }, { # Extra 'url': 'https://www.rctiplus.com/programs/616/inews-malam/extra/9438/diungkapkan-melalui-surat-terbuka-ceo-ruangguru-belva-devara-mundur-dari-staf-khusus-presiden', 'md5': 'c48106afdbce609749f5e0c007d9278a', 'info_dict': { 'id': 'v_ex9438', 'title': 'md5:2ede828c0f8bde249e0912be150314ca', 'display_id': 'md5:62b8d4e9ff096db527a1ad797e8a9933', 'description': 'md5:2ede828c0f8bde249e0912be150314ca', 'ext': 'mp4', 'duration': 93, 'timestamp': 1587561540, 'upload_date': '20200422', 'series': 'iNews Malam', 'channel': 'INews', }, }, { # Missed event/replay 'url': 'https://www.rctiplus.com/missed-event/2507/mou-signing-ceremony-27-juli-2021-1400-wib', 'md5': '649c5f27250faed1452ca8b91e06922d', 'info_dict': { 'id': 'v_pe2507', 'title': 'MOU Signing Ceremony | 27 Juli 2021 | 14.00 WIB', 'display_id': 'mou-signing-ceremony-27-juli-2021-1400-wib', 'ext': 'mp4', 'timestamp': 1627142400, 'upload_date': '20210724', 'was_live': True, 'release_timestamp': 1627369200, }, 'params': { 'fixup': 'never', }, }, { # Live event; Cloudfront CDN 'url': 'https://www.rctiplus.com/live-event/2530/dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', 'info_dict': { 'id': 'v_le2530', 'title': 'Dai Muda : Charging Imun dengan Iman | 4 Agustus 2021 | 16.00 WIB', 'display_id': 'dai-muda-charging-imun-dengan-iman-4-agustus-2021-1600-wib', 'ext': 'mp4', 'timestamp': 1627898400, 'upload_date': '20210802', 'release_timestamp': 1628067600, }, 'params': { 'skip_download': True, }, 'skip': 'This live event has ended.', }, { # TV; live_at is null 'url': 'https://www.rctiplus.com/live-event/1/rcti', 'info_dict': { 'id': 'v_lt1', 'title': 'RCTI', 'display_id': 'rcti', 'ext': 'mp4', 'timestamp': 1546344000, 'upload_date': '20190101', 'is_live': True, }, 'params': { 'skip_download': True, }, }] _CONVIVA_JSON_TEMPLATE = { 't': 'CwsSessionHb', 'cid': 'ff84ae928c3b33064b76dec08f12500465e59a6f', 'clid': '0', 'sid': 0, 'seq': 0, 'caps': 0, 'sf': 7, 'sdk': True, } def _real_extract(self, url): match = self._match_valid_url(url).groupdict() video_type, video_id, display_id = match['type'], match['id'], match['display_id'] url_api_version = 'v2' if video_type == 'missed-event' else 'v1' appier_id = '23984824_' + str(random.randint(0, 10000000000)) # Based on the webpage's uuidRandom generator video_json = self._call_api( f'https://api.rctiplus.com/api/{url_api_version}/{video_type}/{video_id}/url?appierid={appier_id}', display_id, 'Downloading video URL JSON')[0] video_url = video_json['url'] is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['live_at']) if is_upcoming is None: is_upcoming = try_get(video_json, lambda x: x['current_date'] < x['start_date']) if is_upcoming: self.raise_no_formats( 'This event will start at {}.'.format(video_json['live_label']) if video_json.get('live_label') else 'This event has not started yet.', expected=True) if 'akamaized' in video_url: # For some videos hosted on Akamai's CDN (possibly AES-encrypted ones?), a session needs to at least be made via Conviva's API conviva_json_data = { **self._CONVIVA_JSON_TEMPLATE, 'url': video_url, 'sst': int(time.time()), } conviva_json_res = self._download_json( 'https://ff84ae928c3b33064b76dec08f12500465e59a6f.cws.conviva.com/0/wsg', display_id, 'Creating Conviva session', 'Failed to create Conviva session', fatal=False, data=json.dumps(conviva_json_data).encode()) if conviva_json_res and conviva_json_res.get('err') != 'ok': self.report_warning('Conviva said: {}'.format(str(conviva_json_res.get('err')))) video_meta, meta_paths = self._call_api( f'https://api.rctiplus.com/api/v1/{video_type}/{video_id}', display_id, 'Downloading video metadata') thumbnails, image_path = [], meta_paths.get('image_path', 'https://rstatic.akamaized.net/media/') if video_meta.get('portrait_image'): thumbnails.append({ 'id': 'portrait_image', 'url': '{}{}{}'.format(image_path, 2000, video_meta['portrait_image']), # 2000px seems to be the highest resolution that can be given }) if video_meta.get('landscape_image'): thumbnails.append({ 'id': 'landscape_image', 'url': '{}{}{}'.format(image_path, 2000, video_meta['landscape_image']), }) try: formats = self._extract_m3u8_formats(video_url, display_id, 'mp4', headers={'Referer': 'https://www.rctiplus.com/'}) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: self.raise_geo_restricted(countries=['ID'], metadata_available=True) else: raise e for f in formats: if 'akamaized' in f['url'] or 'cloudfront' in f['url']: f.setdefault('http_headers', {})['Referer'] = 'https://www.rctiplus.com/' # Referer header is required for akamai/cloudfront CDNs return { 'id': video_meta.get('product_id') or video_json.get('product_id'), 'title': dict_get(video_meta, ('title', 'name')) or dict_get(video_json, ('content_name', 'assets_name')), 'display_id': display_id, 'description': video_meta.get('summary'), 'timestamp': video_meta.get('release_date') or video_json.get('start_date'), 'duration': video_meta.get('duration'), 'categories': [video_meta['genre']] if video_meta.get('genre') else None, 'average_rating': video_meta.get('star_rating'), 'series': video_meta.get('program_title') or video_json.get('program_title'), 'season_number': video_meta.get('season'), 'episode_number': video_meta.get('episode'), 'channel': video_json.get('tv_name'), 'channel_id': video_json.get('tv_id'), 'formats': formats, 'thumbnails': thumbnails, 'is_live': video_type == 'live-event' and not is_upcoming, 'was_live': video_type == 'missed-event', 'live_status': 'is_upcoming' if is_upcoming else None, 'release_timestamp': video_json.get('live_at'), } class RCTIPlusSeriesIE(RCTIPlusBaseIE): _VALID_URL = r'https?://www\.rctiplus\.com/programs/(?P<id>\d+)/(?P<display_id>[^/?#&]+)(?:/(?P<type>episodes|extras|clips))?' _TESTS = [{ 'url': 'https://www.rctiplus.com/programs/829/putri-untuk-pangeran', 'playlist_mincount': 1019, 'info_dict': { 'id': '829', 'title': 'Putri Untuk Pangeran', 'description': 'md5:aca7b54d05bd95a67d4f4613cc1d622d', 'age_limit': 2, 'cast': ['Verrel Bramasta', 'Ranty Maria', 'Riza Syah', 'Ivan Fadilla', 'Nicole Parham', 'Dll', 'Aviv Elham'], 'display_id': 'putri-untuk-pangeran', 'tags': 'count:18', }, }, { # No episodes 'url': 'https://www.rctiplus.com/programs/615/inews-pagi', 'playlist_mincount': 388, 'info_dict': { 'id': '615', 'title': 'iNews Pagi', 'description': 'md5:f18ee3d4643cfb41c358e5a9b693ee04', 'age_limit': 2, 'tags': 'count:11', 'display_id': 'inews-pagi', }, }] _AGE_RATINGS = { # Based off https://id.wikipedia.org/wiki/Sistem_rating_konten_televisi with additional ratings 'S-SU': 2, 'SU': 2, 'P': 2, 'A': 7, 'R': 13, 'R-R/1': 17, # Labelled as 17+ despite being R 'D': 18, } @classmethod def suitable(cls, url): return False if RCTIPlusIE.suitable(url) else super().suitable(url) def _entries(self, url, display_id=None, note='Downloading entries JSON', metadata={}): total_pages = 0 try: total_pages = self._call_api( f'{url}&length=20&page=0', display_id, note)[1]['pagination']['total_page'] except ExtractorError as e: if 'not found' in str(e): return [] raise e if total_pages <= 0: return [] for page_num in range(1, total_pages + 1): episode_list = self._call_api( f'{url}&length=20&page={page_num}', display_id, f'{note} page {page_num}')[0] or [] for video_json in episode_list: yield { '_type': 'url', 'url': video_json['share_link'], 'ie_key': RCTIPlusIE.ie_key(), 'id': video_json.get('product_id'), 'title': video_json.get('title'), 'display_id': video_json.get('title_code').replace('_', '-'), 'description': video_json.get('summary'), 'timestamp': video_json.get('release_date'), 'duration': video_json.get('duration'), 'season_number': video_json.get('season'), 'episode_number': video_json.get('episode'), **metadata, } def _series_entries(self, series_id, display_id=None, video_type=None, metadata={}): if not video_type or video_type in 'episodes': try: seasons_list = self._call_api( f'https://api.rctiplus.com/api/v1/program/{series_id}/season', display_id, 'Downloading seasons list JSON')[0] except ExtractorError as e: if 'not found' not in str(e): raise seasons_list = [] for season in seasons_list: yield from self._entries( f'https://api.rctiplus.com/api/v2/program/{series_id}/episode?season={season["season"]}', display_id, f'Downloading season {season["season"]} episode entries', metadata) if not video_type or video_type in 'extras': yield from self._entries( f'https://api.rctiplus.com/api/v2/program/{series_id}/extra?content_id=0', display_id, 'Downloading extra entries', metadata) if not video_type or video_type in 'clips': yield from self._entries( f'https://api.rctiplus.com/api/v2/program/{series_id}/clip?content_id=0', display_id, 'Downloading clip entries', metadata) def _real_extract(self, url): series_id, display_id, video_type = self._match_valid_url(url).group('id', 'display_id', 'type') if video_type: self.report_warning( f'Only {video_type} will be downloaded. ' f'To download everything from the series, remove "/{video_type}" from the URL') series_meta, meta_paths = self._call_api( f'https://api.rctiplus.com/api/v1/program/{series_id}/detail', display_id, 'Downloading series metadata') metadata = { 'age_limit': try_get(series_meta, lambda x: self._AGE_RATINGS[x['age_restriction'][0]['code']]), 'cast': traverse_obj(series_meta, (('starring', 'creator', 'writer'), ..., 'name'), expected_type=lambda x: strip_or_none(x) or None), 'tags': traverse_obj(series_meta, ('tag', ..., 'name'), expected_type=lambda x: strip_or_none(x) or None), } return self.playlist_result( self._series_entries(series_id, display_id, video_type, metadata), series_id, series_meta.get('title'), series_meta.get('summary'), display_id=display_id, **metadata) class RCTIPlusTVIE(RCTIPlusBaseIE): _VALID_URL = r'https?://www\.rctiplus\.com/((tv/(?P<tvname>\w+))|(?P<eventname>live-event|missed-event))' _TESTS = [{ 'url': 'https://www.rctiplus.com/tv/rcti', 'info_dict': { 'id': 'v_lt1', 'title': 'RCTI', 'ext': 'mp4', 'timestamp': 1546344000, 'upload_date': '20190101', }, 'params': { 'skip_download': True, }, }, { # Returned video will always change 'url': 'https://www.rctiplus.com/live-event', 'only_matching': True, }, { # Returned video will also always change 'url': 'https://www.rctiplus.com/missed-event', 'only_matching': True, }] @classmethod def suitable(cls, url): return False if RCTIPlusIE.suitable(url) else super().suitable(url) def _real_extract(self, url): match = self._match_valid_url(url).groupdict() tv_id = match.get('tvname') or match.get('eventname') webpage = self._download_webpage(url, tv_id) video_type, video_id = self._search_regex( r'url\s*:\s*["\']https://api\.rctiplus\.com/api/v./(?P<type>[^/]+)/(?P<id>\d+)/url', webpage, 'video link', group=('type', 'id')) return self.url_result(f'https://www.rctiplus.com/{video_type}/{video_id}/{tv_id}', 'RCTIPlus') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rds.py�����������������������������������������������������������0000664�0000000�0000000�00000005365�14675634471�0020034�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( js_to_json, parse_duration, parse_iso8601, ) class RDSIE(InfoExtractor): _WORKING = False IE_DESC = 'RDS.ca' _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+' _TESTS = [{ # has two 9c9media ContentPackages, the web player selects the first ContentPackage 'url': 'https://www.rds.ca/videos/Hockey/NationalHockeyLeague/teams/9/forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande-3.1377606', 'info_dict': { 'id': '2083309', 'display_id': 'forum-du-5-a-7-jesperi-kotkaniemi-de-retour-de-finlande', 'ext': 'flv', 'title': 'Forum du 5 à 7 : Kotkaniemi de retour de Finlande', 'description': 'md5:83fa38ecc4a79b19e433433254077f25', 'timestamp': 1606129030, 'upload_date': '20201123', 'duration': 773.039, }, }, { 'url': 'http://www.rds.ca/vid%C3%A9os/un-voyage-positif-3.877934', 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json) video_id = str(item['id']) title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta( 'title', webpage, 'title', fatal=True) description = self._og_search_description(webpage) or self._html_search_meta( 'description', webpage, 'description') thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex( [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"', r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'], webpage, 'thumbnail', fatal=False) timestamp = parse_iso8601(self._search_regex( r'<span[^>]+itemprop="uploadDate"[^>]+content="([^"]+)"', webpage, 'upload date', fatal=False)) duration = parse_duration(self._search_regex( r'<span[^>]+itemprop="duration"[^>]+content="([^"]+)"', webpage, 'duration', fatal=False)) age_limit = self._family_friendly_search(webpage) return { '_type': 'url_transparent', 'id': video_id, 'display_id': display_id, 'url': f'9c9media:rds_web:{video_id}', 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, 'age_limit': age_limit, 'ie_key': 'NineCNineMedia', } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/redbee.py��������������������������������������������������������0000664�0000000�0000000�00000034425�14675634471�0020471�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import re import time import urllib.parse import uuid from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, int_or_none, strip_or_none, traverse_obj, try_call, unified_timestamp, ) class RedBeeBaseIE(InfoExtractor): _DEVICE_ID = str(uuid.uuid4()) @property def _API_URL(self): """ Ref: https://apidocs.emp.ebsd.ericsson.net Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT """ return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}' def _get_bearer_token(self, asset_id, jwt=None): request = { 'deviceId': self._DEVICE_ID, 'device': { 'deviceId': self._DEVICE_ID, 'name': 'Mozilla Firefox 102', 'type': 'WEB', }, } if jwt: request['jwt'] = jwt return self._download_json( f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}', asset_id, data=json.dumps(request).encode(), headers={ 'Content-Type': 'application/json;charset=utf-8', })['sessionToken'] def _get_formats_and_subtitles(self, asset_id, **kwargs): bearer_token = self._get_bearer_token(asset_id, **kwargs) api_response = self._download_json( f'{self._API_URL}/entitlement/{asset_id}/play', asset_id, headers={ 'Authorization': f'Bearer {bearer_token}', 'Accept': 'application/json, text/plain, */*', }) formats, subtitles = [], {} for format_data in api_response['formats']: if not format_data.get('mediaLocator'): continue fmts, subs = [], {} if format_data.get('format') == 'DASH': fmts, subs = self._extract_mpd_formats_and_subtitles( format_data['mediaLocator'], asset_id, fatal=False) elif format_data.get('format') == 'SMOOTHSTREAMING': fmts, subs = self._extract_ism_formats_and_subtitles( format_data['mediaLocator'], asset_id, fatal=False) elif format_data.get('format') == 'HLS': fmts, subs = self._extract_m3u8_formats_and_subtitles( format_data['mediaLocator'], asset_id, fatal=False) if format_data.get('drm'): for f in fmts: f['has_drm'] = True formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) return formats, subtitles class ParliamentLiveUKIE(RedBeeBaseIE): IE_NAME = 'parliamentlive.tv' IE_DESC = 'UK parliament videos' _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _REDBEE_CUSTOMER = 'UKParliament' _REDBEE_BUSINESS_UNIT = 'ParliamentLive' _TESTS = [{ 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'info_dict': { 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', 'ext': 'mp4', 'title': 'Home Affairs Committee', 'timestamp': 1395153872, 'upload_date': '20140318', 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail', }, }, { 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', 'only_matching': True, }, { 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377', 'info_dict': { 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377', 'ext': 'mp4', 'title': 'House of Commons', 'timestamp': 1658392447, 'upload_date': '20220721', 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail', }, }] def _real_extract(self, url): video_id = self._match_id(url) formats, subtitles = self._get_formats_and_subtitles(video_id) video_info = self._download_json( f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'title': traverse_obj(video_info, ('event', 'title')), 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'), 'timestamp': traverse_obj( video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp), '_format_sort_fields': ('res', 'proto'), } class RTBFIE(RedBeeBaseIE): _WORKING = False _VALID_URL = r'''(?x) https?://(?:www\.)?rtbf\.be/ (?: video/[^?]+\?.*\bid=| ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| auvio/[^/]+\?.*\b(?P<live>l)?id= )(?P<id>\d+)''' _NETRC_MACHINE = 'rtbf' _REDBEE_CUSTOMER = 'RTBF' _REDBEE_BUSINESS_UNIT = 'Auvio' _TESTS = [{ 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', 'md5': '8c876a1cceeb6cf31b476461ade72384', 'info_dict': { 'id': '1921274', 'ext': 'mp4', 'title': 'Les Diables au coeur (épisode 2)', 'description': '(du 25/04/2014)', 'duration': 3099.54, 'upload_date': '20140425', 'timestamp': 1398456300, }, 'skip': 'No longer available', }, { # geo restricted 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', 'only_matching': True, }, { 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', 'only_matching': True, }, { 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', 'only_matching': True, }, { # Live 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', 'only_matching': True, }, { # Audio 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', 'only_matching': True, }, { # With Subtitle 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', 'only_matching': True, }, { 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926', 'md5': 'd5d11bb62169fef38d7ce7ac531e034f', 'info_dict': { 'id': '2921926', 'ext': 'mp4', 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme', 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52', 'duration': 5258.8, 'upload_date': '20220727', 'timestamp': 1658934000, 'series': '#Investigation', 'thumbnail': r're:^https?://[^?&]+\.jpg$', }, }, { 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492', 'md5': '054f9f143bc79c89647c35e5a7d35fa8', 'info_dict': { 'id': '2920492', 'ext': 'mp4', 'title': '04 - Le crime de la rue Royale', 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6', 'duration': 1574.6, 'upload_date': '20220723', 'timestamp': 1658596887, 'series': 'La Belgique criminelle - TV', 'thumbnail': r're:^https?://[^?&]+\.jpg$', }, }] _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' _PROVIDERS = { 'YOUTUBE': 'Youtube', 'DAILYMOTION': 'Dailymotion', 'VIMEO': 'Vimeo', } _QUALITIES = [ ('mobile', 'SD'), ('web', 'MD'), ('high', 'HD'), ] _LOGIN_URL = 'https://login.rtbf.be/accounts.login' _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO' _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}' def _perform_login(self, username, password): if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID): return self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600) login_response = self._download_json( self._LOGIN_URL, None, data=urllib.parse.urlencode({ 'loginID': username, 'password': password, 'APIKey': self._GIGYA_API_KEY, 'targetEnv': 'jssdk', 'sessionExpiration': '-2', }).encode(), headers={ 'Content-Type': 'application/x-www-form-urlencoded', }) if login_response['statusCode'] != 200: raise ExtractorError('Login failed. Server message: {}'.format(login_response['errorMessage']), expected=True) self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'], secure=True, expire_time=time.time() + 3600) def _get_formats_and_subtitles(self, url, media_id): login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID) if not login_token: self.raise_login_required() session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json( 'https://login.rtbf.be/accounts.getJWT', media_id, query={ 'login_token': login_token.value, 'APIKey': self._GIGYA_API_KEY, 'sdk': 'js_latest', 'authMode': 'cookie', 'pageURL': url, 'sdkBuild': '13273', 'format': 'json', })['id_token'] return super()._get_formats_and_subtitles(media_id, jwt=session_jwt) def _real_extract(self, url): live, media_id = self._match_valid_url(url).groups() embed_page = self._download_webpage( 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), media_id, query={'id': media_id}) media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False) if not media_data: if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page): raise ExtractorError('Livestream has ended.', expected=True) if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page): self.raise_login_required() raise ExtractorError('Could not find media data') data = self._parse_json(media_data, media_id) error = data.get('error') if error: raise ExtractorError(f'{self.IE_NAME} said: {error}', expected=True) provider = data.get('provider') if provider in self._PROVIDERS: return self.url_result(data['url'], self._PROVIDERS[provider]) title = traverse_obj(data, 'subtitle', 'title') is_live = data.get('isLive') height_re = r'-(\d+)p\.' formats, subtitles = [], {} # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake' # since all they contain is a 20s video that is completely unrelated. # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092 m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls') if m3u8_url: fmts, subs = self._extract_m3u8_formats_and_subtitles( m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x http_url = data.get('url') if formats and http_url and re.search(height_re, http_url): http_url = fix_url(http_url) for m3u8_f in formats[:]: height = m3u8_f.get('height') if not height: continue f = m3u8_f.copy() del f['protocol'] f.update({ 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), 'url': re.sub(height_re, '-%dp.' % height, http_url), }) formats.append(f) else: sources = data.get('sources') or {} for key, format_id in self._QUALITIES: format_url = sources.get(key) if not format_url: continue height = int_or_none(self._search_regex( height_re, format_url, 'height', default=None)) formats.append({ 'format_id': format_id, 'url': fix_url(format_url), 'height': height, }) mpd_url = None if data.get('isLive') else data.get('urlDash') if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): fmts, subs = self._extract_mpd_formats_and_subtitles( mpd_url, media_id, mpd_id='dash', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) audio_url = data.get('urlAudio') if audio_url: formats.append({ 'format_id': 'audio', 'url': audio_url, 'vcodec': 'none', }) for track in (data.get('tracks') or {}).values(): sub_url = track.get('url') if not sub_url: continue subtitles.setdefault(track.get('lang') or 'fr', []).append({ 'url': sub_url, }) if not formats: fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) return { 'id': media_id, 'formats': formats, 'title': title, 'description': strip_or_none(data.get('description')), 'thumbnail': data.get('thumbnail'), 'duration': float_or_none(data.get('realDuration')), 'timestamp': int_or_none(data.get('liveFrom')), 'series': data.get('programLabel'), 'subtitles': subtitles, 'is_live': is_live, '_format_sort_fields': ('res', 'proto'), } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/redbulltv.py�����������������������������������������������������0000664�0000000�0000000�00000022030�14675634471�0021233�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, float_or_none, ) class RedBullTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?redbull(?:\.tv|\.com(?:/[^/]+)?(?:/tv)?)(?:/events/[^/]+)?/(?:videos?|live|(?:film|episode)s)/(?P<id>AP-\w+)' _TESTS = [{ # film 'url': 'https://www.redbull.tv/video/AP-1Q6XCDTAN1W11', 'md5': 'fb0445b98aa4394e504b413d98031d1f', 'info_dict': { 'id': 'AP-1Q6XCDTAN1W11', 'ext': 'mp4', 'title': 'ABC of... WRC - ABC of... S1E6', 'description': 'md5:5c7ed8f4015c8492ecf64b6ab31e7d31', 'duration': 1582.04, }, }, { # episode 'url': 'https://www.redbull.tv/video/AP-1PMHKJFCW1W11', 'info_dict': { 'id': 'AP-1PMHKJFCW1W11', 'ext': 'mp4', 'title': 'Grime - Hashtags S2E4', 'description': 'md5:5546aa612958c08a98faaad4abce484d', 'duration': 904, }, 'params': { 'skip_download': True, }, }, { 'url': 'https://www.redbull.com/int-en/tv/video/AP-1UWHCAR9S1W11/rob-meets-sam-gaze?playlist=playlists::3f81040a-2f31-4832-8e2e-545b1d39d173', 'only_matching': True, }, { 'url': 'https://www.redbull.com/us-en/videos/AP-1YM9QCYE52111', 'only_matching': True, }, { 'url': 'https://www.redbull.com/us-en/events/AP-1XV2K61Q51W11/live/AP-1XUJ86FDH1W11', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/films/AP-1ZSMAW8FH2111', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/episodes/AP-1TQWK7XE11W11', 'only_matching': True, }] def extract_info(self, video_id): session = self._download_json( 'https://api.redbull.tv/v3/session', video_id, note='Downloading access token', query={ 'category': 'personal_computer', 'os_family': 'http', }) if session.get('code') == 'error': raise ExtractorError('{} said: {}'.format( self.IE_NAME, session['message'])) token = session['token'] try: video = self._download_json( 'https://api.redbull.tv/v3/products/' + video_id, video_id, note='Downloading video information', headers={'Authorization': token}, ) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 404: error_message = self._parse_json( e.cause.response.read().decode(), video_id)['error'] raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True) raise title = video['title'].strip() formats, subtitles = self._extract_m3u8_formats_and_subtitles( f'https://dms.redbull.tv/v3/{video_id}/{token}/playlist.m3u8', video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') for resource in video.get('resources', []): if resource.startswith('closed_caption_'): splitted_resource = resource.split('_') if splitted_resource[2]: subtitles.setdefault('en', []).append({ 'url': f'https://resources.redbull.tv/{video_id}/{resource}', 'ext': splitted_resource[2], }) subheading = video.get('subheading') if subheading: title += f' - {subheading}' return { 'id': video_id, 'title': title, 'description': video.get('long_description') or video.get( 'short_description'), 'duration': float_or_none(video.get('duration'), scale=1000), 'formats': formats, 'subtitles': subtitles, } def _real_extract(self, url): video_id = self._match_id(url) return self.extract_info(video_id) class RedBullEmbedIE(RedBullTVIE): # XXX: Do not subclass from concrete IE _VALID_URL = r'https?://(?:www\.)?redbull\.com/embed/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12}:[a-z]{2}-[A-Z]{2,3})' _TESTS = [{ # HLS manifest accessible only using assetId 'url': 'https://www.redbull.com/embed/rrn:content:episode-videos:f3021f4f-3ed4-51ac-915a-11987126e405:en-INT', 'only_matching': True, }] _VIDEO_ESSENSE_TMPL = '''... on %s { videoEssence { attributes } }''' def _real_extract(self, url): rrn_id = self._match_id(url) asset_id = self._download_json( 'https://edge-graphql.crepo-production.redbullaws.com/v1/graphql', rrn_id, headers={ 'Accept': 'application/json', 'API-KEY': 'e90a1ff11335423998b100c929ecc866', }, query={ 'query': '''{ resource(id: "%s", enforceGeoBlocking: false) { %s %s } }''' % (rrn_id, self._VIDEO_ESSENSE_TMPL % 'LiveVideo', self._VIDEO_ESSENSE_TMPL % 'VideoResource'), # noqa: UP031 })['data']['resource']['videoEssence']['attributes']['assetId'] return self.extract_info(asset_id) class RedBullTVRrnContentIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/tv/(?:video|live|film)/(?P<id>rrn:content:[^:]+:[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' _TESTS = [{ 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:live-videos:e3e6feb4-e95f-50b7-962a-c70f8fd13c73/mens-dh-finals-fort-william', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/tv/video/rrn:content:videos:a36a0f36-ff1b-5db8-a69d-ee11a14bf48b/tn-ts-style?playlist=rrn:content:event-profiles:83f05926-5de8-5389-b5e4-9bb312d715e8:extras', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/tv/film/rrn:content:films:d1f4d00e-4c04-5d19-b510-a805ffa2ab83/follow-me', 'only_matching': True, }] def _real_extract(self, url): region, lang, rrn_id = self._match_valid_url(url).groups() rrn_id += f':{lang}-{region.upper()}' return self.url_result( 'https://www.redbull.com/embed/' + rrn_id, RedBullEmbedIE.ie_key(), rrn_id) class RedBullIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?redbull\.com/(?P<region>[a-z]{2,3})-(?P<lang>[a-z]{2})/(?P<type>(?:episode|film|(?:(?:recap|trailer)-)?video)s|live)/(?!AP-|rrn:content:)(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://www.redbull.com/int-en/episodes/grime-hashtags-s02-e04', 'md5': 'db8271a7200d40053a1809ed0dd574ff', 'info_dict': { 'id': 'AA-1MT8DQWA91W14', 'ext': 'mp4', 'title': 'Grime - Hashtags S2E4', 'description': 'md5:5546aa612958c08a98faaad4abce484d', }, }, { 'url': 'https://www.redbull.com/int-en/films/kilimanjaro-mountain-of-greatness', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/recap-videos/uci-mountain-bike-world-cup-2017-mens-xco-finals-from-vallnord', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/trailer-videos/kings-of-content', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/videos/tnts-style-red-bull-dance-your-style-s1-e12', 'only_matching': True, }, { 'url': 'https://www.redbull.com/int-en/live/mens-dh-finals-fort-william', 'only_matching': True, }, { # only available on the int-en website so a fallback is need for the API # https://www.redbull.com/v3/api/graphql/v1/v3/query/en-GB>en-INT?filter[uriSlug]=fia-wrc-saturday-recap-estonia&rb3Schema=v1:hero 'url': 'https://www.redbull.com/gb-en/live/fia-wrc-saturday-recap-estonia', 'only_matching': True, }] _INT_FALLBACK_LIST = ['de', 'en', 'es', 'fr'] _LAT_FALLBACK_MAP = ['ar', 'bo', 'car', 'cl', 'co', 'mx', 'pe'] def _real_extract(self, url): region, lang, filter_type, display_id = self._match_valid_url(url).groups() if filter_type == 'episodes': filter_type = 'episode-videos' elif filter_type == 'live': filter_type = 'live-videos' regions = [region.upper()] if region != 'int': if region in self._LAT_FALLBACK_MAP: regions.append('LAT') if lang in self._INT_FALLBACK_LIST: regions.append('INT') locale = '>'.join([f'{lang}-{reg}' for reg in regions]) rrn_id = self._download_json( 'https://www.redbull.com/v3/api/graphql/v1/v3/query/' + locale, display_id, query={ 'filter[type]': filter_type, 'filter[uriSlug]': display_id, 'rb3Schema': 'v1:hero', })['data']['id'] return self.url_result( 'https://www.redbull.com/embed/' + rrn_id, RedBullEmbedIE.ie_key(), rrn_id) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/reddit.py��������������������������������������������������������0000664�0000000�0000000�00000037766�14675634471�0020531�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, float_or_none, int_or_none, parse_qs, traverse_obj, try_get, unescapeHTML, update_url_query, url_or_none, urlencode_postdata, ) class RedditIE(InfoExtractor): _NETRC_MACHINE = 'reddit' _VALID_URL = r'https?://(?P<host>(?:\w+\.)?reddit(?:media)?\.com)/(?P<slug>(?:(?:r|user)/[^/]+/)?comments/(?P<id>[^/?#&]+))' _TESTS = [{ 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj/that_small_heart_attack/', 'info_dict': { 'id': 'zv89llsvexdz', 'ext': 'mp4', 'display_id': '6rrwyj', 'title': 'That small heart attack.', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:4', 'timestamp': 1501941939, 'upload_date': '20170805', 'uploader': 'Antw87', 'duration': 12, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 0, 'channel_id': 'videos', }, 'params': { 'skip_download': True, }, }, { # 1080p fallback format 'url': 'https://www.reddit.com/r/aww/comments/90bu6w/heat_index_was_110_degrees_so_we_offered_him_a/', 'md5': '8b5902cfda3006bf90faea7adf765a49', 'info_dict': { 'id': 'gyh95hiqc0b11', 'ext': 'mp4', 'display_id': '90bu6w', 'title': 'Heat index was 110 degrees so we offered him a cold drink. He went for a full body soak instead', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:7', 'timestamp': 1532051078, 'upload_date': '20180720', 'uploader': 'FootLoosePickleJuice', 'duration': 14, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 0, 'channel_id': 'aww', }, }, { # User post 'url': 'https://www.reddit.com/user/creepyt0es/comments/nip71r/i_plan_to_make_more_stickers_and_prints_check/', 'info_dict': { 'id': 'zasobba6wp071', 'ext': 'mp4', 'display_id': 'nip71r', 'title': 'I plan to make more stickers and prints! Check them out on my Etsy! Or get them through my Patreon. Links below.', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'thumbnails': 'count:5', 'timestamp': 1621709093, 'upload_date': '20210522', 'uploader': 'creepyt0es', 'duration': 6, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 18, 'channel_id': 'u_creepyt0es', }, 'params': { 'skip_download': True, }, }, { # videos embedded in reddit text post 'url': 'https://www.reddit.com/r/KamenRider/comments/wzqkxp/finale_kamen_rider_revice_episode_50_family_to/', 'playlist_count': 2, 'info_dict': { 'id': 'wzqkxp', 'title': 'md5:72d3d19402aa11eff5bd32fc96369b37', }, }, { # crossposted reddit-hosted media 'url': 'https://www.reddit.com/r/dumbfuckers_club/comments/zjjw82/cringe/', 'md5': '746180895c7b75a9d6b05341f507699a', 'info_dict': { 'id': 'a1oneun6pa5a1', 'ext': 'mp4', 'display_id': 'zjjw82', 'title': 'Cringe', 'uploader': 'Otaku-senpai69420', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'upload_date': '20221212', 'timestamp': 1670812309, 'duration': 16, 'like_count': int, 'dislike_count': int, 'comment_count': int, 'age_limit': 0, 'channel_id': 'dumbfuckers_club', }, }, { # post link without subreddit 'url': 'https://www.reddit.com/comments/124pp33', 'md5': '15eec9d828adcef4468b741a7e45a395', 'info_dict': { 'id': 'antsenjc2jqa1', 'ext': 'mp4', 'display_id': '124pp33', 'title': 'Harmless prank of some old friends', 'uploader': 'Dudezila', 'channel_id': 'ContagiousLaughter', 'duration': 17, 'upload_date': '20230328', 'timestamp': 1680012043, 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'age_limit': 0, 'comment_count': int, 'dislike_count': int, 'like_count': int, }, }, { # quarantined subreddit post 'url': 'https://old.reddit.com/r/GenZedong/comments/12fujy3/based_hasan/', 'md5': '3156ea69e3c1f1b6259683c5abd36e71', 'info_dict': { 'id': '8bwtclfggpsa1', 'ext': 'mp4', 'display_id': '12fujy3', 'title': 'Based Hasan?', 'uploader': 'KingNigelXLII', 'channel_id': 'GenZedong', 'duration': 16, 'upload_date': '20230408', 'timestamp': 1680979138, 'age_limit': 0, 'comment_count': int, 'dislike_count': int, 'like_count': int, }, 'skip': 'Requires account that has opted-in to the GenZedong subreddit', }, { # subtitles in HLS manifest 'url': 'https://www.reddit.com/r/Unexpected/comments/1cl9h0u/the_insurance_claim_will_be_interesting/', 'info_dict': { 'id': 'a2mdj5d57qyc1', 'ext': 'mp4', 'display_id': '1cl9h0u', 'title': 'The insurance claim will be interesting', 'uploader': 'darrenpauli', 'channel_id': 'Unexpected', 'duration': 53, 'upload_date': '20240506', 'timestamp': 1714966382, 'age_limit': 0, 'comment_count': int, 'dislike_count': int, 'like_count': int, 'subtitles': {'en': 'mincount:1'}, }, 'params': { 'skip_download': True, }, }, { # subtitles from caption-url 'url': 'https://www.reddit.com/r/soccer/comments/1cxwzso/tottenham_1_0_newcastle_united_james_maddison_31/', 'info_dict': { 'id': 'xbmj4t3igy1d1', 'ext': 'mp4', 'display_id': '1cxwzso', 'title': 'Tottenham [1] - 0 Newcastle United - James Maddison 31\'', 'uploader': 'Woodstovia', 'channel_id': 'soccer', 'duration': 30, 'upload_date': '20240522', 'timestamp': 1716373798, 'age_limit': 0, 'comment_count': int, 'dislike_count': int, 'like_count': int, 'subtitles': {'en': 'mincount:1'}, }, 'params': { 'skip_download': True, 'writesubtitles': True, }, }, { 'url': 'https://www.reddit.com/r/videos/comments/6rrwyj', 'only_matching': True, }, { # imgur 'url': 'https://www.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', 'only_matching': True, }, { # imgur @ old reddit 'url': 'https://old.reddit.com/r/MadeMeSmile/comments/6t7wi5/wait_for_it/', 'only_matching': True, }, { # streamable 'url': 'https://www.reddit.com/r/videos/comments/6t7sg9/comedians_hilarious_joke_about_the_guam_flag/', 'only_matching': True, }, { # youtube 'url': 'https://www.reddit.com/r/videos/comments/6t75wq/southern_man_tries_to_speak_without_an_accent/', 'only_matching': True, }, { # reddit video @ nm reddit 'url': 'https://nm.reddit.com/r/Cricket/comments/8idvby/lousy_cameraman_finds_himself_in_cairns_line_of/', 'only_matching': True, }, { 'url': 'https://www.redditmedia.com/r/serbia/comments/pu9wbx/ako_vu%C4%8Di%C4%87_izgubi_izbore_ja_%C4%87u_da_crknem/', 'only_matching': True, }] def _perform_login(self, username, password): captcha = self._download_json( 'https://www.reddit.com/api/requires_captcha/login.json', None, 'Checking login requirement')['required'] if captcha: raise ExtractorError('Reddit is requiring captcha before login', expected=True) login = self._download_json( f'https://www.reddit.com/api/login/{username}', None, data=urlencode_postdata({ 'op': 'login-main', 'user': username, 'passwd': password, 'api_type': 'json', }), note='Logging in', errnote='Login request failed') errors = '; '.join(traverse_obj(login, ('json', 'errors', ..., 1))) if errors: raise ExtractorError(f'Unable to login, Reddit API says {errors}', expected=True) elif not traverse_obj(login, ('json', 'data', 'cookie', {str})): raise ExtractorError('Unable to login, no cookie was returned') def _get_subtitles(self, video_id): # Fallback if there were no subtitles provided by DASH or HLS manifests caption_url = f'https://v.redd.it/{video_id}/wh_ben_en.vtt' if self._is_valid_url(caption_url, video_id, item='subtitles'): return {'en': [{'url': caption_url}]} def _real_extract(self, url): host, slug, video_id = self._match_valid_url(url).group('host', 'slug', 'id') data = self._download_json( f'https://{host}/{slug}/.json', video_id, fatal=False, expected_status=403) if not data: fallback_host = 'old.reddit.com' if host != 'old.reddit.com' else 'www.reddit.com' self.to_screen(f'{host} request failed, retrying with {fallback_host}') data = self._download_json( f'https://{fallback_host}/{slug}/.json', video_id, expected_status=403) if traverse_obj(data, 'error') == 403: reason = data.get('reason') if reason == 'quarantined': self.raise_login_required('Quarantined subreddit; an account that has opted in is required') elif reason == 'private': self.raise_login_required('Private subreddit; an account that has been approved is required') else: raise ExtractorError(f'HTTP Error 403 Forbidden; reason given: {reason}') data = data[0]['data']['children'][0]['data'] video_url = data['url'] over_18 = data.get('over_18') if over_18 is True: age_limit = 18 elif over_18 is False: age_limit = 0 else: age_limit = None thumbnails = [] def add_thumbnail(src): if not isinstance(src, dict): return thumbnail_url = url_or_none(src.get('url')) if not thumbnail_url: return thumbnails.append({ 'url': unescapeHTML(thumbnail_url), 'width': int_or_none(src.get('width')), 'height': int_or_none(src.get('height')), 'http_headers': {'Accept': '*/*'}, }) for image in try_get(data, lambda x: x['preview']['images']) or []: if not isinstance(image, dict): continue add_thumbnail(image.get('source')) resolutions = image.get('resolutions') if isinstance(resolutions, list): for resolution in resolutions: add_thumbnail(resolution) info = { 'title': data.get('title'), 'thumbnails': thumbnails, 'timestamp': float_or_none(data.get('created_utc')), 'uploader': data.get('author'), 'channel_id': data.get('subreddit'), 'like_count': int_or_none(data.get('ups')), 'dislike_count': int_or_none(data.get('downs')), 'comment_count': int_or_none(data.get('num_comments')), 'age_limit': age_limit, } parsed_url = urllib.parse.urlparse(video_url) # Check for embeds in text posts, or else raise to avoid recursing into the same reddit URL if 'reddit.com' in parsed_url.netloc and f'/{video_id}/' in parsed_url.path: entries = [] for media in traverse_obj(data, ('media_metadata', ...), expected_type=dict): if not media.get('id') or media.get('e') != 'RedditVideo': continue formats = [] if media.get('hlsUrl'): formats.extend(self._extract_m3u8_formats( unescapeHTML(media['hlsUrl']), video_id, 'mp4', m3u8_id='hls', fatal=False)) if media.get('dashUrl'): formats.extend(self._extract_mpd_formats( unescapeHTML(media['dashUrl']), video_id, mpd_id='dash', fatal=False)) if formats: entries.append({ 'id': media['id'], 'display_id': video_id, 'formats': formats, **info, }) if entries: return self.playlist_result(entries, video_id, info.get('title')) raise ExtractorError('No media found', expected=True) # Check if media is hosted on reddit: reddit_video = traverse_obj(data, ( (None, ('crosspost_parent_list', ...)), ('secure_media', 'media'), 'reddit_video'), get_all=False) if reddit_video: playlist_urls = [ try_get(reddit_video, lambda x: unescapeHTML(x[y])) for y in ('dash_url', 'hls_url') ] # Update video_id display_id = video_id video_id = self._search_regex( r'https?://v\.redd\.it/(?P<id>[^/?#&]+)', reddit_video['fallback_url'], 'video_id', default=display_id) dash_playlist_url = playlist_urls[0] or f'https://v.redd.it/{video_id}/DASHPlaylist.mpd' hls_playlist_url = playlist_urls[1] or f'https://v.redd.it/{video_id}/HLSPlaylist.m3u8' qs = traverse_obj(parse_qs(hls_playlist_url), { 'f': ('f', 0, {lambda x: ','.join([x, 'subsAll']) if x else 'hd,subsAll'}), }) hls_playlist_url = update_url_query(hls_playlist_url, qs) formats = [{ 'url': unescapeHTML(reddit_video['fallback_url']), 'height': int_or_none(reddit_video.get('height')), 'width': int_or_none(reddit_video.get('width')), 'tbr': int_or_none(reddit_video.get('bitrate_kbps')), 'acodec': 'none', 'vcodec': 'h264', 'ext': 'mp4', 'format_id': 'fallback', 'format_note': 'DASH video, mp4_dash', }] hls_fmts, subtitles = self._extract_m3u8_formats_and_subtitles( hls_playlist_url, display_id, 'mp4', m3u8_id='hls', fatal=False) formats.extend(hls_fmts) dash_fmts, dash_subs = self._extract_mpd_formats_and_subtitles( dash_playlist_url, display_id, mpd_id='dash', fatal=False) formats.extend(dash_fmts) self._merge_subtitles(dash_subs, target=subtitles) return { **info, 'id': video_id, 'display_id': display_id, 'formats': formats, 'subtitles': subtitles or self.extract_subtitles(video_id), 'duration': int_or_none(reddit_video.get('duration')), } if parsed_url.netloc == 'v.redd.it': self.raise_no_formats('This video is processing', expected=True, video_id=video_id) return { **info, 'id': parsed_url.path.split('/')[1], 'display_id': video_id, } # Not hosted on reddit, must continue extraction return { **info, 'display_id': video_id, '_type': 'url_transparent', 'url': video_url, } ����������yt-dlp-2024.09.27/yt_dlp/extractor/redge.py���������������������������������������������������������0000664�0000000�0000000�00000012163�14675634471�0020324�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import functools from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( float_or_none, int_or_none, join_nonempty, parse_qs, update_url_query, ) from ..utils.traversal import traverse_obj class RedCDNLivxIE(InfoExtractor): _VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx' IE_NAME = 'redcdnlivx' _TESTS = [{ 'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000', 'info_dict': { 'id': 'ENC02-638272860000-638292544000', 'ext': 'mp4', 'title': 'ENC02', 'duration': 19683.982, 'live_status': 'was_live', }, }, { 'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000', 'info_dict': { 'id': 'ENC18-722333096000-722335562000', 'ext': 'mp4', 'title': 'ENC18', 'duration': 2463.995, 'live_status': 'was_live', }, }, { 'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000', 'info_dict': { 'id': 'triathlon2018-warsaw-550305000000-550327620000', 'ext': 'mp4', 'title': 'triathlon2018/warsaw', 'duration': 22619.98, 'live_status': 'was_live', }, }, { 'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000', 'only_matching': True, }, { 'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000', 'only_matching': True, }] ''' Known methods (first in url path): - `livedash` - DASH MPD - `livehls` - HTTP Live Streaming - `livess` - IIS Smooth Streaming - `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac - `sc` - shoutcast/icecast (audio streams, like radio) ''' def _real_extract(self, url): tenant, path = self._match_valid_url(url).group('tenant', 'id') qs = parse_qs(url) start_time = traverse_obj(qs, ('startTime', 0, {int_or_none})) stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none})) def livx_mode(mode): suffix = '' if mode == 'livess': suffix = '/manifest' elif mode == 'livehls': suffix = '/playlist.m3u8' file_qs = {} if start_time: file_qs['startTime'] = start_time if stop_time: file_qs['stopTime'] = stop_time if mode == 'nvr': file_qs['nolimit'] = 1 elif mode != 'sc': file_qs['indexMode'] = 'true' return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs) # no id or title for a transmission. making ones up. title = path \ .replace('/live', '').replace('live/', '') \ .replace('/channel', '').replace('channel/', '') \ .strip('/') video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time) formats = [] # downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata ism_res = self._download_xml_handle( livx_mode('livess'), video_id, note='Downloading ISM manifest', errnote='Failed to download ISM manifest', fatal=False) ism_doc = None if ism_res is not False: ism_doc, ism_urlh = ism_res formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss') nvr_urlh = self._request_webpage( HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False, expected_status=lambda _: True) if nvr_urlh and nvr_urlh.status == 200: formats.append({ 'url': nvr_urlh.url, 'ext': 'flv', 'format_id': 'direct-0', 'preference': -1, # might be slow }) formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False)) formats.extend(self._extract_m3u8_formats( livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False)) time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000 duration = traverse_obj( ism_doc, ('@Duration', {functools.partial(float_or_none, scale=time_scale)})) or None live_status = None if traverse_obj(ism_doc, '@IsLive') == 'TRUE': live_status = 'is_live' elif duration: live_status = 'was_live' return { 'id': video_id, 'title': title, 'formats': formats, 'duration': duration, 'live_status': live_status, } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/redgifs.py�������������������������������������������������������0000664�0000000�0000000�00000022452�14675634471�0020663�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import functools import urllib.parse from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, OnDemandPagedList, int_or_none, qualities, try_get, ) class RedGifsBaseInfoExtractor(InfoExtractor): _FORMATS = { 'gif': 250, 'sd': 480, 'hd': None, } _API_HEADERS = { 'referer': 'https://www.redgifs.com/', 'origin': 'https://www.redgifs.com', 'content-type': 'application/json', } def _parse_gif_data(self, gif_data): video_id = gif_data.get('id') quality = qualities(tuple(self._FORMATS.keys())) orig_height = int_or_none(gif_data.get('height')) aspect_ratio = try_get(gif_data, lambda x: orig_height / x['width']) formats = [] for format_id, height in self._FORMATS.items(): video_url = gif_data['urls'].get(format_id) if not video_url: continue height = min(orig_height, height or orig_height) formats.append({ 'url': video_url, 'format_id': format_id, 'width': height * aspect_ratio if aspect_ratio else None, 'height': height, 'quality': quality(format_id), }) return { 'id': video_id, 'webpage_url': f'https://redgifs.com/watch/{video_id}', 'extractor_key': RedGifsIE.ie_key(), 'extractor': 'RedGifs', 'title': ' '.join(gif_data.get('tags') or []) or 'RedGifs', 'timestamp': int_or_none(gif_data.get('createDate')), 'uploader': gif_data.get('userName'), 'duration': int_or_none(gif_data.get('duration')), 'view_count': int_or_none(gif_data.get('views')), 'like_count': int_or_none(gif_data.get('likes')), 'categories': gif_data.get('tags') or [], 'tags': gif_data.get('tags'), 'age_limit': 18, 'formats': formats, } def _fetch_oauth_token(self, video_id): # https://github.com/Redgifs/api/wiki/Temporary-tokens auth = self._download_json('https://api.redgifs.com/v2/auth/temporary', video_id, note='Fetching temporary token') if not auth.get('token'): raise ExtractorError('Unable to get temporary token') self._API_HEADERS['authorization'] = f'Bearer {auth["token"]}' def _call_api(self, ep, video_id, **kwargs): for first_attempt in True, False: if 'authorization' not in self._API_HEADERS: self._fetch_oauth_token(video_id) try: headers = dict(self._API_HEADERS) headers['x-customheader'] = f'https://www.redgifs.com/watch/{video_id}' data = self._download_json( f'https://api.redgifs.com/v2/{ep}', video_id, headers=headers, **kwargs) break except ExtractorError as e: if first_attempt and isinstance(e.cause, HTTPError) and e.cause.status == 401: del self._API_HEADERS['authorization'] # refresh the token continue raise if 'error' in data: raise ExtractorError(f'RedGifs said: {data["error"]}', expected=True, video_id=video_id) return data def _fetch_page(self, ep, video_id, query, page): query['page'] = page + 1 data = self._call_api( ep, video_id, query=query, note=f'Downloading JSON metadata page {page + 1}') for entry in data['gifs']: yield self._parse_gif_data(entry) def _prepare_api_query(self, query, fields): api_query = [ (field_name, query.get(field_name, (default,))[0]) for field_name, default in fields.items()] return {key: val for key, val in api_query if val is not None} def _paged_entries(self, ep, item_id, query, fields): page = int_or_none(query.get('page', (None,))[0]) page_fetcher = functools.partial( self._fetch_page, ep, item_id, self._prepare_api_query(query, fields)) return page_fetcher(page) if page else OnDemandPagedList(page_fetcher, self._PAGE_SIZE) class RedGifsIE(RedGifsBaseInfoExtractor): _VALID_URL = r'https?://(?:(?:www\.)?redgifs\.com/watch/|thumbs2\.redgifs\.com/)(?P<id>[^-/?#\.]+)' _TESTS = [{ 'url': 'https://www.redgifs.com/watch/squeakyhelplesswisent', 'info_dict': { 'id': 'squeakyhelplesswisent', 'ext': 'mp4', 'title': 'Hotwife Legs Thick', 'timestamp': 1636287915, 'upload_date': '20211107', 'uploader': 'ignored52', 'duration': 16, 'view_count': int, 'like_count': int, 'categories': list, 'age_limit': 18, 'tags': list, }, }, { 'url': 'https://thumbs2.redgifs.com/SqueakyHelplessWisent-mobile.mp4#t=0', 'info_dict': { 'id': 'squeakyhelplesswisent', 'ext': 'mp4', 'title': 'Hotwife Legs Thick', 'timestamp': 1636287915, 'upload_date': '20211107', 'uploader': 'ignored52', 'duration': 16, 'view_count': int, 'like_count': int, 'categories': list, 'age_limit': 18, 'tags': list, }, }] def _real_extract(self, url): video_id = self._match_id(url).lower() video_info = self._call_api( f'gifs/{video_id}?views=yes', video_id, note='Downloading video info') return self._parse_gif_data(video_info['gif']) class RedGifsSearchIE(RedGifsBaseInfoExtractor): IE_DESC = 'Redgifs search' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/browse\?(?P<query>[^#]+)' _PAGE_SIZE = 80 _TESTS = [ { 'url': 'https://www.redgifs.com/browse?tags=Lesbian', 'info_dict': { 'id': 'tags=Lesbian', 'title': 'Lesbian', 'description': 'RedGifs search for Lesbian, ordered by trending', }, 'playlist_mincount': 100, }, { 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian', 'info_dict': { 'id': 'type=g&order=latest&tags=Lesbian', 'title': 'Lesbian', 'description': 'RedGifs search for Lesbian, ordered by latest', }, 'playlist_mincount': 100, }, { 'url': 'https://www.redgifs.com/browse?type=g&order=latest&tags=Lesbian&page=2', 'info_dict': { 'id': 'type=g&order=latest&tags=Lesbian&page=2', 'title': 'Lesbian', 'description': 'RedGifs search for Lesbian, ordered by latest', }, 'playlist_count': 80, }, ] def _real_extract(self, url): query_str = self._match_valid_url(url).group('query') query = urllib.parse.parse_qs(query_str) if not query.get('tags'): raise ExtractorError('Invalid query tags', expected=True) tags = query.get('tags')[0] order = query.get('order', ('trending',))[0] query['search_text'] = [tags] entries = self._paged_entries('gifs/search', query_str, query, { 'search_text': None, 'order': 'trending', 'type': None, }) return self.playlist_result( entries, query_str, tags, f'RedGifs search for {tags}, ordered by {order}') class RedGifsUserIE(RedGifsBaseInfoExtractor): IE_DESC = 'Redgifs user' _VALID_URL = r'https?://(?:www\.)?redgifs\.com/users/(?P<username>[^/?#]+)(?:\?(?P<query>[^#]+))?' _PAGE_SIZE = 30 _TESTS = [ { 'url': 'https://www.redgifs.com/users/lamsinka89', 'info_dict': { 'id': 'lamsinka89', 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by recent', }, 'playlist_mincount': 100, }, { 'url': 'https://www.redgifs.com/users/lamsinka89?page=3', 'info_dict': { 'id': 'lamsinka89?page=3', 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by recent', }, 'playlist_count': 30, }, { 'url': 'https://www.redgifs.com/users/lamsinka89?order=best&type=g', 'info_dict': { 'id': 'lamsinka89?order=best&type=g', 'title': 'lamsinka89', 'description': 'RedGifs user lamsinka89, ordered by best', }, 'playlist_mincount': 100, }, ] def _real_extract(self, url): username, query_str = self._match_valid_url(url).group('username', 'query') playlist_id = f'{username}?{query_str}' if query_str else username query = urllib.parse.parse_qs(query_str) order = query.get('order', ('recent',))[0] entries = self._paged_entries(f'users/{username}/search', playlist_id, query, { 'order': 'recent', 'type': None, }) return self.playlist_result( entries, playlist_id, username, f'RedGifs user {username}, ordered by {order}') ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/redtube.py�������������������������������������������������������0000664�0000000�0000000�00000014101�14675634471�0020662�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, int_or_none, merge_dicts, str_to_int, unified_strdate, url_or_none, urljoin, ) class RedTubeIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:\w+\.)?redtube\.com(?:\.br)?/|embed\.redtube\.com/\?.*?\bid=)(?P<id>[0-9]+)' _EMBED_REGEX = [r'<iframe[^>]+?src=["\'](?P<url>(?:https?:)?//embed\.redtube\.com/\?.*?\bid=\d+)'] _TESTS = [{ 'url': 'https://www.redtube.com/38864951', 'md5': '4fba70cbca3aefd25767ab4b523c9878', 'info_dict': { 'id': '38864951', 'ext': 'mp4', 'title': 'Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu', 'description': 'Watch video Public Sex on the Balcony in Freezing Paris! Amateur Couple LeoLulu on Redtube, home of free Blowjob porn videos and Blonde sex movies online. Video length: (10:46) - Uploaded by leolulu - Verified User - Starring Pornstar: Leolulu', 'upload_date': '20210111', 'timestamp': 1610343109, 'duration': 646, 'view_count': int, 'age_limit': 18, 'thumbnail': r're:https://\wi-ph\.rdtcdn\.com/videos/.+/.+\.jpg', }, }, { 'url': 'http://embed.redtube.com/?bgcolor=000000&id=1443286', 'only_matching': True, }, { 'url': 'http://it.redtube.com/66418', 'only_matching': True, }, { 'url': 'https://www.redtube.com.br/103224331', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( f'https://www.redtube.com/{video_id}', video_id) ERRORS = ( (('video-deleted-info', '>This video has been removed'), 'has been removed'), (('private_video_text', '>This video is private', '>Send a friend request to its owner to be able to view it'), 'is private'), ) for patterns, message in ERRORS: if any(p in webpage for p in patterns): raise ExtractorError( f'Video {video_id} {message}', expected=True) info = self._search_json_ld(webpage, video_id, default={}) if not info.get('title'): info['title'] = self._html_search_regex( (r'<h(\d)[^>]+class="(?:video_title_text|videoTitle|video_title)[^"]*">(?P<title>(?:(?!\1).)+)</h\1>', r'(?:videoTitle|title)\s*:\s*(["\'])(?P<title>(?:(?!\1).)+)\1'), webpage, 'title', group='title', default=None) or self._og_search_title(webpage) formats = [] sources = self._parse_json( self._search_regex( r'sources\s*:\s*({.+?})', webpage, 'source', default='{}'), video_id, fatal=False) if sources and isinstance(sources, dict): for format_id, format_url in sources.items(): if format_url: formats.append({ 'url': format_url, 'format_id': format_id, 'height': int_or_none(format_id), }) medias = self._parse_json( self._search_regex( r'mediaDefinition["\']?\s*:\s*(\[.+?}\s*\])', webpage, 'media definitions', default='{}'), video_id, fatal=False) for media in medias if isinstance(medias, list) else []: format_url = urljoin('https://www.redtube.com', media.get('videoUrl')) if not format_url: continue format_id = media.get('format') quality = media.get('quality') if format_id == 'hls' or (format_id == 'mp4' and not quality): more_media = self._download_json(format_url, video_id, fatal=False) else: more_media = [media] for media in more_media if isinstance(more_media, list) else []: format_url = url_or_none(media.get('videoUrl')) if not format_url: continue format_id = media.get('format') if format_id == 'hls' or determine_ext(format_url) == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=format_id or 'hls', fatal=False)) continue format_id = media.get('quality') formats.append({ 'url': format_url, 'ext': 'mp4', 'format_id': format_id, 'height': int_or_none(format_id), }) if not formats: video_url = self._html_search_regex( r'<source src="(.+?)" type="video/mp4">', webpage, 'video URL') formats.append({'url': video_url, 'ext': 'mp4'}) thumbnail = self._og_search_thumbnail(webpage) upload_date = unified_strdate(self._search_regex( r'<span[^>]+>(?:ADDED|Published on) ([^<]+)<', webpage, 'upload date', default=None)) duration = int_or_none(self._og_search_property( 'video:duration', webpage, default=None) or self._search_regex( r'videoDuration\s*:\s*(\d+)', webpage, 'duration', default=None)) view_count = str_to_int(self._search_regex( (r'<div[^>]*>Views</div>\s*<div[^>]*>\s*([\d,.]+)', r'<span[^>]*>VIEWS</span>\s*</td>\s*<td>\s*([\d,.]+)', r'<span[^>]+\bclass=["\']video_view_count[^>]*>\s*([\d,.]+)'), webpage, 'view count', default=None)) # No self-labeling, but they describe themselves as # "Home of Videos Porno" age_limit = 18 return merge_dicts(info, { 'id': video_id, 'ext': 'mp4', 'thumbnail': thumbnail, 'upload_date': upload_date, 'duration': duration, 'view_count': view_count, 'age_limit': age_limit, 'formats': formats, }) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rentv.py���������������������������������������������������������0000664�0000000�0000000�00000007744�14675634471�0020405�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( determine_ext, int_or_none, url_or_none, ) class RENTVIE(InfoExtractor): _WORKING = False _VALID_URL = r'(?:rentv:|https?://(?:www\.)?ren\.tv/(?:player|video/epizod)/)(?P<id>\d+)' _TESTS = [{ 'url': 'http://ren.tv/video/epizod/118577', 'md5': 'd91851bf9af73c0ad9b2cdf76c127fbb', 'info_dict': { 'id': '118577', 'ext': 'mp4', 'title': 'Документальный спецпроект: "Промывка мозгов. Технологии XXI века"', 'timestamp': 1472230800, 'upload_date': '20160826', }, }, { 'url': 'http://ren.tv/player/118577', 'only_matching': True, }, { 'url': 'rentv:118577', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage('http://ren.tv/player/' + video_id, video_id) config = self._parse_json(self._search_regex( r'config\s*=\s*({.+})\s*;', webpage, 'config'), video_id) title = config['title'] formats = [] for video in config['src']: src = url_or_none(video.get('src')) if not src: continue ext = determine_ext(src) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( src, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) else: formats.append({ 'url': src, }) return { 'id': video_id, 'title': title, 'description': config.get('description'), 'thumbnail': config.get('image'), 'duration': int_or_none(config.get('duration')), 'timestamp': int_or_none(config.get('date')), 'formats': formats, } class RENTVArticleIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?ren\.tv/novosti/\d{4}-\d{2}-\d{2}/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://ren.tv/novosti/2016-10-26/video-mikroavtobus-popavshiy-v-dtp-s-gruzovikami-v-podmoskove-prevratilsya-v', 'md5': 'ebd63c4680b167693745ab91343df1d6', 'info_dict': { 'id': '136472', 'ext': 'mp4', 'title': 'Видео: микроавтобус, попавший в ДТП с грузовиками в Подмосковье, превратился в груду металла', 'description': 'Жертвами столкновения двух фур и микроавтобуса, по последним данным, стали семь человек.', }, }, { # TODO: invalid m3u8 'url': 'http://ren.tv/novosti/2015-09-25/sluchaynyy-prohozhiy-poymal-avtougonshchika-v-murmanske-video', 'info_dict': { 'id': 'playlist', 'ext': 'mp4', 'title': 'Случайный прохожий поймал автоугонщика в Мурманске. ВИДЕО | РЕН ТВ', 'uploader': 'ren.tv', }, 'params': { # m3u8 downloads 'skip_download': True, }, 'skip': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) drupal_settings = self._parse_json(self._search_regex( r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'), display_id) entries = [] for config_profile in drupal_settings.get('ren_jwplayer', {}).values(): media_id = config_profile.get('mediaid') if not media_id: continue media_id = str(media_id) entries.append(self.url_result('rentv:' + media_id, 'RENTV', media_id)) return self.playlist_result(entries, display_id) ����������������������������yt-dlp-2024.09.27/yt_dlp/extractor/restudy.py�������������������������������������������������������0000664�0000000�0000000�00000002376�14675634471�0020742�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RestudyIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:(?:www|portal)\.)?restudy\.dk/video/[^/]+/id/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.restudy.dk/video/play/id/1637', 'info_dict': { 'id': '1637', 'ext': 'flv', 'title': 'Leiden-frosteffekt', 'description': 'Denne video er et eksperiment med flydende kvælstof.', }, 'params': { # rtmp download 'skip_download': True, }, }, { 'url': 'https://portal.restudy.dk/video/leiden-frosteffekt/id/1637', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._og_search_title(webpage).strip() description = self._og_search_description(webpage).strip() formats = self._extract_smil_formats( f'https://cdn.portal.restudy.dk/dynamic/themes/front/awsmedia/SmilDirectory/video_{video_id}.xml', video_id) return { 'id': video_id, 'title': title, 'description': description, 'formats': formats, } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/reuters.py�������������������������������������������������������0000664�0000000�0000000�00000004465�14675634471�0020735�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( int_or_none, js_to_json, unescapeHTML, ) class ReutersIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?reuters\.com/.*?\?.*?videoId=(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.reuters.com/video/2016/05/20/san-francisco-police-chief-resigns?videoId=368575562', 'md5': '8015113643a0b12838f160b0b81cc2ee', 'info_dict': { 'id': '368575562', 'ext': 'mp4', 'title': 'San Francisco police chief resigns', }, } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( f'http://www.reuters.com/assets/iframe/yovideo?videoId={video_id}', video_id) video_data = js_to_json(self._search_regex( r'(?s)Reuters\.yovideo\.drawPlayer\(({.*?})\);', webpage, 'video data')) def get_json_value(key, fatal=False): return self._search_regex(rf'"{key}"\s*:\s*"([^"]+)"', video_data, key, fatal=fatal) title = unescapeHTML(get_json_value('title', fatal=True)) mmid, fid = re.search(r',/(\d+)\?f=(\d+)', get_json_value('flv', fatal=True)).groups() mas_data = self._download_json( f'http://mas-e.cds1.yospace.com/mas/{mmid}/{fid}?trans=json', video_id, transform_source=js_to_json) formats = [] for f in mas_data: f_url = f.get('url') if not f_url: continue method = f.get('method') if method == 'hls': formats.extend(self._extract_m3u8_formats( f_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) else: container = f.get('container') ext = '3gp' if method == 'mobile' else container formats.append({ 'format_id': ext, 'url': f_url, 'ext': ext, 'container': container if method != 'mobile' else None, }) return { 'id': video_id, 'title': title, 'thumbnail': get_json_value('thumb'), 'duration': int_or_none(get_json_value('seconds')), 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/reverbnation.py��������������������������������������������������0000664�0000000�0000000�00000003057�14675634471�0021736�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( qualities, str_or_none, ) class ReverbNationIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?reverbnation\.com/.*?/song/(?P<id>\d+).*?$' _TESTS = [{ 'url': 'http://www.reverbnation.com/alkilados/song/16965047-mona-lisa', 'md5': 'c0aaf339bcee189495fdf5a8c8ba8645', 'info_dict': { 'id': '16965047', 'ext': 'mp3', 'title': 'MONA LISA', 'uploader': 'ALKILADOS', 'uploader_id': '216429', 'thumbnail': r're:^https?://.*\.jpg', }, }] def _real_extract(self, url): song_id = self._match_id(url) api_res = self._download_json( f'https://api.reverbnation.com/song/{song_id}', song_id, note=f'Downloading information of song {song_id}', ) THUMBNAILS = ('thumbnail', 'image') quality = qualities(THUMBNAILS) thumbnails = [] for thumb_key in THUMBNAILS: if api_res.get(thumb_key): thumbnails.append({ 'url': api_res[thumb_key], 'preference': quality(thumb_key), }) return { 'id': song_id, 'title': api_res['name'], 'url': api_res['url'], 'uploader': api_res.get('artist', {}).get('name'), 'uploader_id': str_or_none(api_res.get('artist', {}).get('id')), 'thumbnails': thumbnails, 'ext': 'mp3', 'vcodec': 'none', } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rheinmaintv.py���������������������������������������������������0000664�0000000�0000000�00000011055�14675634471�0021561�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import extract_attributes, merge_dicts, remove_end class RheinMainTVIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rheinmaintv\.de/sendungen/(?:[\w-]+/)*(?P<video_id>(?P<display_id>[\w-]+)/vom-\d{2}\.\d{2}\.\d{4}(?:/\d+)?)' _TESTS = [{ 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/auf-dem-weg-zur-deutschen-meisterschaft/vom-07.11.2022/', 'info_dict': { 'id': 'auf-dem-weg-zur-deutschen-meisterschaft-vom-07.11.2022', 'ext': 'ismv', # ismv+isma will be merged into mp4 'alt_title': 'Auf dem Weg zur Deutschen Meisterschaft', 'title': 'Auf dem Weg zur Deutschen Meisterschaft', 'upload_date': '20221108', 'view_count': int, 'display_id': 'auf-dem-weg-zur-deutschen-meisterschaft', 'thumbnail': r're:^https://.+\.jpg', 'description': 'md5:48c59b74192bc819a9b34af1d5ed1eb9', 'timestamp': 1667933057, 'duration': 243.0, }, 'params': {'skip_download': 'ism'}, }, { 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften/vom-14.11.2022/', 'info_dict': { 'id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften-vom-14.11.2022', 'ext': 'ismv', 'title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften', 'timestamp': 1668526214, 'display_id': 'formationsgemeinschaft-rhein-main-bei-den-deutschen-meisterschaften', 'alt_title': 'Formationsgemeinschaft Rhein-Main bei den Deutschen Meisterschaften', 'view_count': int, 'thumbnail': r're:^https://.+\.jpg', 'duration': 345.0, 'description': 'md5:9370ba29526984006c2cba1372e5c5a0', 'upload_date': '20221115', }, 'params': {'skip_download': 'ism'}, }, { 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/casino-mainz-bei-den-deutschen-meisterschaften/vom-14.11.2022/', 'info_dict': { 'id': 'casino-mainz-bei-den-deutschen-meisterschaften-vom-14.11.2022', 'ext': 'ismv', 'title': 'Casino Mainz bei den Deutschen Meisterschaften', 'view_count': int, 'timestamp': 1668527402, 'alt_title': 'Casino Mainz bei den Deutschen Meisterschaften', 'upload_date': '20221115', 'display_id': 'casino-mainz-bei-den-deutschen-meisterschaften', 'duration': 348.0, 'thumbnail': r're:^https://.+\.jpg', 'description': 'md5:70fc1660eeba96da17199e5bdff4c0aa', }, 'params': {'skip_download': 'ism'}, }, { 'url': 'https://www.rheinmaintv.de/sendungen/beitrag-video/bricks4kids/vom-22.06.2022/', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('display_id') video_id = mobj.group('video_id').replace('/', '-') webpage = self._download_webpage(url, video_id) source, img = self._search_regex(r'(?s)(?P<source><source[^>]*>)(?P<img><img[^>]*>)', webpage, 'video', group=('source', 'img')) source = extract_attributes(source) img = extract_attributes(img) raw_json_ld = list(self._yield_json_ld(webpage, video_id)) json_ld = self._json_ld(raw_json_ld, video_id) json_ld.pop('url', None) ism_manifest_url = ( source.get('src') or next(json_ld.get('embedUrl') for json_ld in raw_json_ld if json_ld.get('@type') == 'VideoObject') ) formats, subtitles = self._extract_ism_formats_and_subtitles(ism_manifest_url, video_id) return merge_dicts({ 'id': video_id, 'display_id': display_id, 'title': self._html_search_regex(r'<h1><span class="title">([^<]*)</span>', webpage, 'headline', default=None) or img.get('title') or json_ld.get('title') or self._og_search_title(webpage) or remove_end(self._html_extract_title(webpage), ' -'), 'alt_title': img.get('alt'), 'description': json_ld.get('description') or self._og_search_description(webpage), 'formats': formats, 'subtitles': subtitles, 'thumbnails': [{'url': img['src']}] if 'src' in img else json_ld.get('thumbnails'), }, json_ld) �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/ridehome.py������������������������������������������������������0000664�0000000�0000000�00000010503�14675634471�0021026�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .art19 import Art19IE from .common import InfoExtractor from ..utils import extract_attributes, get_elements_html_by_class from ..utils.traversal import traverse_obj class RideHomeIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ridehome\.info/show/[\w-]+/(?P<id>[\w-]+)/?(?:$|[?#])' _TESTS = [{ 'url': 'https://www.ridehome.info/show/techmeme-ride-home/thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs/', 'info_dict': { 'id': 'thu-1228-will-2024-be-the-year-apple-gets-serious-about-gaming-on-macs', }, 'playlist_count': 1, 'playlist': [{ 'md5': 'c84ea3cc96950a9ab86fe540f3edc588', 'info_dict': { 'id': '540e5493-9fe6-4c14-a488-dc508d8794b2', 'ext': 'mp3', 'title': 'Thu. 12/28 – Will 2024 Be The Year Apple Gets Serious About Gaming On Macs?', 'description': 'md5:9dba86ae9b5047a8150eceddeeb629c2', 'series': 'Techmeme Ride Home', 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b', 'upload_date': '20231228', 'timestamp': 1703780995, 'modified_date': '20231230', 'episode_id': '540e5493-9fe6-4c14-a488-dc508d8794b2', 'modified_timestamp': 1703912404, 'release_date': '20231228', 'release_timestamp': 1703782800, 'duration': 1000.1502, 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$', }, }], }, { 'url': 'https://www.ridehome.info/show/techmeme-ride-home/portfolio-profile-sensel-with-ilyarosenberg/', 'info_dict': { 'id': 'portfolio-profile-sensel-with-ilyarosenberg', }, 'playlist_count': 1, 'playlist': [{ 'md5': 'bf9d6efad221008ce71aea09d5533cf6', 'info_dict': { 'id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac', 'ext': 'mp3', 'title': '(Portfolio Profile) Sensel - With @IlyaRosenberg', 'description': 'md5:e1e4a970bce04290e0ba6f030b0125db', 'series': 'Techmeme Ride Home', 'series_id': '3c30e8f4-ab48-415b-9421-1ae06cd4058b', 'upload_date': '20220108', 'timestamp': 1641656064, 'modified_date': '20230418', 'episode_id': '6beed803-b1ef-4536-9fef-c23cf6b4dcac', 'modified_timestamp': 1681843318, 'release_date': '20220108', 'release_timestamp': 1641672000, 'duration': 2789.38122, 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$', }, }], }, { 'url': 'https://www.ridehome.info/show/spacecasts/big-tech-news-apples-macbook-pro-event/', 'info_dict': { 'id': 'big-tech-news-apples-macbook-pro-event', }, 'playlist_count': 1, 'playlist': [{ 'md5': 'b1428530c6e03904a8271e978007fc05', 'info_dict': { 'id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7', 'ext': 'mp3', 'title': 'md5:e6c05d44d59b6577a4145ac339de5040', 'description': 'md5:14152f7228c8a301a77e3d6bc891b145', 'series': 'SpaceCasts', 'series_id': '8e3e837d-7fe0-4a23-8e11-894917e07e17', 'upload_date': '20211026', 'timestamp': 1635271450, 'modified_date': '20230502', 'episode_id': 'f4780044-6c4b-4ce0-8215-8a86cc66bff7', 'modified_timestamp': 1683057500, 'release_date': '20211026', 'release_timestamp': 1635272124, 'duration': 2266.30531, 'thumbnail': r're:^https?://content\.production\.cdn\.art19\.com/images/.*\.jpeg$', }, }], }] def _real_extract(self, url): article_id = self._match_id(url) webpage = self._download_webpage(url, article_id) urls = traverse_obj( get_elements_html_by_class('iframeContainer', webpage), (..., {extract_attributes}, lambda k, v: k == 'data-src' and Art19IE.suitable(v))) return self.playlist_from_matches(urls, article_id, ie=Art19IE) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rinsefm.py�������������������������������������������������������0000664�0000000�0000000�00000006177�14675634471�0020711�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( MEDIA_EXTENSIONS, determine_ext, parse_iso8601, traverse_obj, url_or_none, ) class RinseFMBaseIE(InfoExtractor): @staticmethod def _parse_entry(entry): return { **traverse_obj(entry, { 'id': ('id', {str}), 'title': ('title', {str}), 'url': ('fileUrl', {url_or_none}), 'release_timestamp': ('episodeDate', {parse_iso8601}), 'thumbnail': ('featuredImage', 0, 'filename', {str}, {lambda x: x and f'https://rinse.imgix.net/media/{x}'}), 'webpage_url': ('slug', {str}, {lambda x: x and f'https://rinse.fm/episodes/{x}'}), }), 'vcodec': 'none', 'extractor_key': RinseFMIE.ie_key(), 'extractor': RinseFMIE.IE_NAME, } class RinseFMIE(RinseFMBaseIE): _VALID_URL = r'https?://(?:www\.)?rinse\.fm/episodes/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://rinse.fm/episodes/club-glow-15-12-2023-2000/', 'md5': '76ee0b719315617df42e15e710f46c7b', 'info_dict': { 'id': '1536535', 'ext': 'mp3', 'title': 'Club Glow - 15/12/2023 - 20:00', 'thumbnail': r're:^https://.+\.(?:jpg|JPG)$', 'release_timestamp': 1702598400, 'release_date': '20231215', }, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) entry = self._search_nextjs_data(webpage, display_id)['props']['pageProps']['entry'] return self._parse_entry(entry) class RinseFMArtistPlaylistIE(RinseFMBaseIE): _VALID_URL = r'https?://(?:www\.)?rinse\.fm/shows/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'https://rinse.fm/shows/resources/', 'info_dict': { 'id': 'resources', 'title': '[re]sources', 'description': '[re]sources est un label parisien piloté par le DJ et producteur Tommy Kid.', }, 'playlist_mincount': 40, }, { 'url': 'https://rinse.fm/shows/ivy/', 'info_dict': { 'id': 'ivy', 'title': '[IVY]', 'description': 'A dedicated space for DNB/Turbo House and 4x4.', }, 'playlist_mincount': 7, }] def _entries(self, data): for episode in traverse_obj(data, ( 'props', 'pageProps', 'episodes', lambda _, v: determine_ext(v['fileUrl']) in MEDIA_EXTENSIONS.audio), ): yield self._parse_entry(episode) def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) title = self._og_search_title(webpage) or self._html_search_meta('title', webpage) description = self._og_search_description(webpage) or self._html_search_meta( 'description', webpage) data = self._search_nextjs_data(webpage, playlist_id) return self.playlist_result( self._entries(data), playlist_id, title, description=description) �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rmcdecouverte.py�������������������������������������������������0000664�0000000�0000000�00000005260�14675634471�0022105�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import urllib.parse from .brightcove import BrightcoveLegacyIE from .common import InfoExtractor from ..utils import smuggle_url class RMCDecouverteIE(InfoExtractor): _VALID_URL = r'https?://rmcdecouverte\.bfmtv\.com/(?:[^?#]*_(?P<id>\d+)|mediaplayer-direct)/?(?:[#?]|$)' _TESTS = [{ 'url': 'https://rmcdecouverte.bfmtv.com/vestiges-de-guerre_22240/les-bunkers-secrets-domaha-beach_25303/', 'info_dict': { 'id': '6250879771001', 'ext': 'mp4', 'title': 'LES BUNKERS SECRETS D´OMAHA BEACH', 'uploader_id': '1969646226001', 'description': 'md5:aed573ca24abde62a148e0eba909657d', 'timestamp': 1619622984, 'upload_date': '20210428', }, 'params': { 'skip_download': True, }, }, { 'url': 'https://rmcdecouverte.bfmtv.com/wheeler-dealers-occasions-a-saisir/program_2566/', 'info_dict': { 'id': '5983675500001', 'ext': 'mp4', 'title': 'CORVETTE', 'description': 'md5:c1e8295521e45ffebf635d6a7658f506', 'uploader_id': '1969646226001', 'upload_date': '20181226', 'timestamp': 1545861635, }, 'params': { 'skip_download': True, }, 'skip': 'only available for a week', }, { 'url': 'https://rmcdecouverte.bfmtv.com/avions-furtifs-la-technologie-de-lextreme_10598', 'only_matching': True, }, { # The website accepts any URL as long as it has _\d+ at the end 'url': 'https://rmcdecouverte.bfmtv.com/any/thing/can/go/here/_10598', 'only_matching': True, }, { # live, geo restricted, bypassable 'url': 'https://rmcdecouverte.bfmtv.com/mediaplayer-direct/', 'only_matching': True, }] BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/1969646226001/default_default/index.html?videoId=%s' def _real_extract(self, url): mobj = self._match_valid_url(url) display_id = mobj.group('id') or 'direct' webpage = self._download_webpage(url, display_id) brightcove_legacy_url = BrightcoveLegacyIE._extract_brightcove_url(webpage) if brightcove_legacy_url: brightcove_id = urllib.parse.parse_qs(urllib.parse.urlparse( brightcove_legacy_url).query)['@videoPlayer'][0] else: brightcove_id = self._search_regex( r'data-video-id=["\'](\d+)', webpage, 'brightcove id') return self.url_result( smuggle_url( self.BRIGHTCOVE_URL_TEMPLATE % brightcove_id, {'geo_countries': ['FR']}), 'BrightcoveNew', brightcove_id) ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rockstargames.py�������������������������������������������������0000664�0000000�0000000�00000004160�14675634471�0022101�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, parse_iso8601, ) class RockstarGamesIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?rockstargames\.com/videos(?:/video/|#?/?\?.*\bvideo=)(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.rockstargames.com/videos/video/11544/', 'md5': '03b5caa6e357a4bd50e3143fc03e5733', 'info_dict': { 'id': '11544', 'ext': 'mp4', 'title': 'Further Adventures in Finance and Felony Trailer', 'description': 'md5:6d31f55f30cb101b5476c4a379e324a3', 'thumbnail': r're:^https?://.*\.jpg$', 'timestamp': 1464876000, 'upload_date': '20160602', }, }, { 'url': 'http://www.rockstargames.com/videos#/?video=48', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( 'https://www.rockstargames.com/videoplayer/videos/get-video.json', video_id, query={ 'id': video_id, 'locale': 'en_us', })['video'] title = video['title'] formats = [] for v in video['files_processed']['video/mp4']: if not v.get('src'): continue resolution = v.get('resolution') height = int_or_none(self._search_regex( r'^(\d+)[pP]$', resolution or '', 'height', default=None)) formats.append({ 'url': self._proto_relative_url(v['src']), 'format_id': resolution, 'height': height, }) if not formats: youtube_id = video.get('youtube_id') if youtube_id: return self.url_result(youtube_id, 'Youtube') return { 'id': video_id, 'title': title, 'description': video.get('description'), 'thumbnail': self._proto_relative_url(video.get('screencap')), 'timestamp': parse_iso8601(video.get('created')), 'formats': formats, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rokfin.py��������������������������������������������������������0000664�0000000�0000000�00000051074�14675634471�0020532�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import datetime as dt import itertools import json import re import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from ..utils import ( ExtractorError, determine_ext, float_or_none, format_field, int_or_none, str_or_none, traverse_obj, try_get, unescapeHTML, unified_timestamp, url_or_none, urlencode_postdata, ) _API_BASE_URL = 'https://prod-api-v2.production.rokfin.com/api/v2/public/' class RokfinIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?P<id>(?P<type>post|stream)/\d+)' _NETRC_MACHINE = 'rokfin' _AUTH_BASE = 'https://secure.rokfin.com/auth/realms/rokfin-web/protocol/openid-connect' _access_mgmt_tokens = {} # OAuth 2.0: RFC 6749, Sec. 1.4-5 _TESTS = [{ 'url': 'https://www.rokfin.com/post/57548/Mitt-Romneys-Crazy-Solution-To-Climate-Change', 'info_dict': { 'id': 'post/57548', 'ext': 'mp4', 'title': 'Mitt Romney\'s Crazy Solution To Climate Change', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'upload_date': '20211023', 'timestamp': 1634998029, 'channel': 'Jimmy Dore', 'channel_id': '65429', 'channel_url': 'https://rokfin.com/TheJimmyDoreShow', 'availability': 'public', 'live_status': 'not_live', 'dislike_count': int, 'like_count': int, 'duration': 213, }, }, { 'url': 'https://rokfin.com/post/223/Julian-Assange-Arrested-Streaming-In-Real-Time', 'info_dict': { 'id': 'post/223', 'ext': 'mp4', 'title': 'Julian Assange Arrested: Streaming In Real Time', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'upload_date': '20190412', 'timestamp': 1555052644, 'channel': 'Ron Placone', 'channel_id': '10', 'channel_url': 'https://rokfin.com/RonPlacone', 'availability': 'public', 'live_status': 'not_live', 'dislike_count': int, 'like_count': int, 'tags': ['FreeThinkingMedia^', 'RealProgressives^'], }, }, { 'url': 'https://www.rokfin.com/stream/10543/Its-A-Crazy-Mess-Regional-Director-Blows-Whistle-On-Pfizers-Vaccine-Trial-Data', 'info_dict': { 'id': 'stream/10543', 'ext': 'mp4', 'title': '"It\'s A Crazy Mess" Regional Director Blows Whistle On Pfizer\'s Vaccine Trial Data', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'description': 'md5:324ce2d3e3b62e659506409e458b9d8e', 'channel': 'TLAVagabond', 'channel_id': '53856', 'channel_url': 'https://rokfin.com/TLAVagabond', 'availability': 'public', 'is_live': False, 'was_live': True, 'live_status': 'was_live', 'timestamp': 1635874720, 'release_timestamp': 1635874720, 'release_date': '20211102', 'upload_date': '20211102', 'dislike_count': int, 'like_count': int, 'tags': ['FreeThinkingMedia^'], }, }, { 'url': 'https://rokfin.com/post/126703/Brave-New-World--Aldous-Huxley-DEEPDIVE--Chpts-13--Quite-Frankly--Jay-Dyer', 'info_dict': { 'id': 'post/126703', 'ext': 'mp4', 'title': 'Brave New World - Aldous Huxley DEEPDIVE! (Chpts 1-3) - Quite Frankly & Jay Dyer', 'thumbnail': r're:https://img\.production\.rokfin\.com/.+', 'channel': 'Jay Dyer', 'channel_id': '186881', 'channel_url': 'https://rokfin.com/jaydyer', 'availability': 'premium_only', 'live_status': 'not_live', 'dislike_count': int, 'like_count': int, 'timestamp': 1678213357, 'upload_date': '20230307', 'tags': ['FreeThinkingMedia^', 'OpenMind^'], 'description': 'md5:cb04e32e68326c9b2b251b297bacff35', 'duration': 3100, }, }, { 'url': 'https://rokfin.com/stream/31332/The-Grayzone-live-on-Nordstream-blame-game', 'info_dict': { 'id': 'stream/31332', 'ext': 'mp4', 'title': 'The Grayzone live on Nordstream blame game', 'thumbnail': r're:https://image\.v\.rokfin\.com/.+', 'channel': 'Max Blumenthal', 'channel_id': '248902', 'channel_url': 'https://rokfin.com/MaxBlumenthal', 'availability': 'premium_only', 'live_status': 'was_live', 'dislike_count': int, 'like_count': int, 'timestamp': 1678475166, 'release_timestamp': 1678475166.0, 'release_date': '20230310', 'upload_date': '20230310', 'tags': ['FreeThinkingMedia^'], }, }] def _real_extract(self, url): video_id, video_type = self._match_valid_url(url).group('id', 'type') metadata = self._download_json_using_access_token(f'{_API_BASE_URL}{video_id}', video_id) scheduled = unified_timestamp(metadata.get('scheduledAt')) live_status = ('was_live' if metadata.get('stoppedAt') else 'is_upcoming' if scheduled else 'is_live' if video_type == 'stream' else 'not_live') video_url = traverse_obj(metadata, 'url', ('content', 'contentUrl'), expected_type=url_or_none) if video_url in (None, 'fake.m3u8'): video_url = format_field(self._search_regex( r'https?://[^/]+/([^/]+)/storyboard.vtt', traverse_obj(metadata, 'timelineUrl', ('content', 'timelineUrl'), expected_type=url_or_none), video_id, default=None), None, 'https://stream.v.rokfin.com/%s.m3u8') formats, subtitles = [{'url': video_url}] if video_url else [], {} if determine_ext(video_url) == 'm3u8': formats, subtitles = self._extract_m3u8_formats_and_subtitles( video_url, video_id, fatal=False, live=live_status == 'is_live') if not formats: if traverse_obj(metadata, 'premiumPlan', 'premium'): self.raise_login_required('This video is only available to premium users', True, method='cookies') elif scheduled: self.raise_no_formats( f'Stream is offline; scheduled for {dt.datetime.fromtimestamp(scheduled).strftime("%Y-%m-%d %H:%M:%S")}', video_id=video_id, expected=True) uploader = traverse_obj(metadata, ('createdBy', 'username'), ('creator', 'username')) timestamp = (scheduled or float_or_none(metadata.get('postedAtMilli'), 1000) or unified_timestamp(metadata.get('creationDateTime'))) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'title': str_or_none(traverse_obj(metadata, 'title', ('content', 'contentTitle'))), 'duration': float_or_none(traverse_obj(metadata, ('content', 'duration'))), 'thumbnail': url_or_none(traverse_obj(metadata, 'thumbnail', ('content', 'thumbnailUrl1'))), 'description': str_or_none(traverse_obj(metadata, 'description', ('content', 'contentDescription'))), 'like_count': int_or_none(metadata.get('likeCount')), 'dislike_count': int_or_none(metadata.get('dislikeCount')), 'channel': str_or_none(traverse_obj(metadata, ('createdBy', 'name'), ('creator', 'name'))), 'channel_id': str_or_none(traverse_obj(metadata, ('createdBy', 'id'), ('creator', 'id'))), 'channel_url': url_or_none(f'https://rokfin.com/{uploader}') if uploader else None, 'timestamp': timestamp, 'release_timestamp': timestamp if live_status != 'not_live' else None, 'tags': traverse_obj(metadata, ('tags', ..., 'title'), expected_type=str_or_none), 'live_status': live_status, 'availability': self._availability( needs_premium=bool(traverse_obj(metadata, 'premiumPlan', 'premium')), is_private=False, needs_subscription=False, needs_auth=False, is_unlisted=False), # 'comment_count': metadata.get('numComments'), # Data provided by website is wrong '__post_extractor': self.extract_comments(video_id) if video_type == 'post' else None, } def _get_comments(self, video_id): pages_total = None for page_n in itertools.count(): raw_comments = self._download_json( f'{_API_BASE_URL}comment?postId={video_id[5:]}&page={page_n}&size=50', video_id, note=f'Downloading viewer comments page {page_n + 1}{format_field(pages_total, None, " of %s")}', fatal=False) or {} for comment in raw_comments.get('content') or []: yield { 'text': str_or_none(comment.get('comment')), 'author': str_or_none(comment.get('name')), 'id': comment.get('commentId'), 'author_id': comment.get('userId'), 'parent': 'root', 'like_count': int_or_none(comment.get('numLikes')), 'dislike_count': int_or_none(comment.get('numDislikes')), 'timestamp': unified_timestamp(comment.get('postedAt')), } pages_total = int_or_none(raw_comments.get('totalPages')) or None is_last = raw_comments.get('last') if not raw_comments.get('content') or is_last or (page_n > pages_total if pages_total else is_last is not False): return def _perform_login(self, username, password): # https://openid.net/specs/openid-connect-core-1_0.html#CodeFlowAuth (Sec. 3.1) login_page = self._download_webpage( f'{self._AUTH_BASE}/auth?client_id=web&redirect_uri=https%3A%2F%2Frokfin.com%2Ffeed&response_mode=fragment&response_type=code&scope=openid', None, note='loading login page', errnote='error loading login page') authentication_point_url = unescapeHTML(self._search_regex( r'<form\s+[^>]+action\s*=\s*"(https://secure\.rokfin\.com/auth/realms/rokfin-web/login-actions/authenticate\?[^"]+)"', login_page, name='Authentication URL')) resp_body = self._download_webpage( authentication_point_url, None, note='logging in', fatal=False, expected_status=404, data=urlencode_postdata({'username': username, 'password': password, 'rememberMe': 'off', 'credentialId': ''})) if not self._authentication_active(): if re.search(r'(?i)(invalid\s+username\s+or\s+password)', resp_body or ''): raise ExtractorError('invalid username/password', expected=True) raise ExtractorError('Login failed') urlh = self._request_webpage( f'{self._AUTH_BASE}/auth', None, note='granting user authorization', errnote='user authorization rejected by Rokfin', query={ 'client_id': 'web', 'prompt': 'none', 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', 'response_mode': 'fragment', 'response_type': 'code', 'scope': 'openid', }) self._access_mgmt_tokens = self._download_json( f'{self._AUTH_BASE}/token', None, note='getting access credentials', errnote='error getting access credentials', data=urlencode_postdata({ 'code': urllib.parse.parse_qs(urllib.parse.urldefrag(urlh.url).fragment).get('code')[0], 'client_id': 'web', 'grant_type': 'authorization_code', 'redirect_uri': 'https://rokfin.com/silent-check-sso.html', })) def _authentication_active(self): return not ( {'KEYCLOAK_IDENTITY', 'KEYCLOAK_IDENTITY_LEGACY', 'KEYCLOAK_SESSION', 'KEYCLOAK_SESSION_LEGACY'} - set(self._get_cookies(self._AUTH_BASE))) def _get_auth_token(self): return try_get(self._access_mgmt_tokens, lambda x: ' '.join([x['token_type'], x['access_token']])) def _download_json_using_access_token(self, url_or_request, video_id, headers={}, query={}): assert 'authorization' not in headers headers = headers.copy() auth_token = self._get_auth_token() refresh_token = self._access_mgmt_tokens.get('refresh_token') if auth_token: headers['authorization'] = auth_token json_string, urlh = self._download_webpage_handle( url_or_request, video_id, headers=headers, query=query, expected_status=401) if not auth_token or urlh.status != 401 or refresh_token is None: return self._parse_json(json_string, video_id) self._access_mgmt_tokens = self._download_json( f'{self._AUTH_BASE}/token', video_id, note='User authorization expired or canceled by Rokfin. Re-authorizing ...', errnote='Failed to re-authorize', data=urlencode_postdata({ 'grant_type': 'refresh_token', 'refresh_token': refresh_token, 'client_id': 'web', })) headers['authorization'] = self._get_auth_token() if headers['authorization'] is None: raise ExtractorError('User authorization lost', expected=True) return self._download_json(url_or_request, video_id, headers=headers, query=query) class RokfinPlaylistBaseIE(InfoExtractor): _TYPES = { 'video': 'post', 'audio': 'post', 'stream': 'stream', 'dead_stream': 'stream', 'stack': 'stack', } def _get_video_data(self, metadata): for content in metadata.get('content') or []: media_type = self._TYPES.get(content.get('mediaType')) video_id = content.get('id') if media_type == 'post' else content.get('mediaId') if not media_type or not video_id: continue yield self.url_result(f'https://rokfin.com/{media_type}/{video_id}', video_id=f'{media_type}/{video_id}', video_title=str_or_none(traverse_obj(content, ('content', 'contentTitle')))) class RokfinStackIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:stack' IE_DESC = 'Rokfin Stacks' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/stack/(?P<id>[^/]+)' _TESTS = [{ 'url': 'https://www.rokfin.com/stack/271/Tulsi-Gabbard-Portsmouth-Townhall-FULL--Feb-9-2020', 'playlist_count': 8, 'info_dict': { 'id': '271', }, }] def _real_extract(self, url): list_id = self._match_id(url) return self.playlist_result(self._get_video_data( self._download_json(f'{_API_BASE_URL}stack/{list_id}', list_id)), list_id) class RokfinChannelIE(RokfinPlaylistBaseIE): IE_NAME = 'rokfin:channel' IE_DESC = 'Rokfin Channels' _VALID_URL = r'https?://(?:www\.)?rokfin\.com/(?!((feed/?)|(discover/?)|(channels/?))$)(?P<id>[^/]+)/?$' _TESTS = [{ 'url': 'https://rokfin.com/TheConvoCouch', 'playlist_mincount': 100, 'info_dict': { 'id': '12071-new', 'title': 'TheConvoCouch - New', 'description': 'md5:bb622b1bca100209b91cd685f7847f06', }, }] _TABS = { 'new': 'posts', 'top': 'top', 'videos': 'video', 'podcasts': 'audio', 'streams': 'stream', 'stacks': 'stack', } def _real_initialize(self): self._validate_extractor_args() def _validate_extractor_args(self): requested_tabs = self._configuration_arg('tab', None) if requested_tabs is not None and (len(requested_tabs) > 1 or requested_tabs[0] not in self._TABS): raise ExtractorError(f'Invalid extractor-arg "tab". Must be one of {", ".join(self._TABS)}', expected=True) def _entries(self, channel_id, channel_name, tab): pages_total = None for page_n in itertools.count(0): if tab in ('posts', 'top'): data_url = f'{_API_BASE_URL}user/{channel_name}/{tab}?page={page_n}&size=50' else: data_url = f'{_API_BASE_URL}post/search/{tab}?page={page_n}&size=50&creator={channel_id}' metadata = self._download_json( data_url, channel_name, note=f'Downloading video metadata page {page_n + 1}{format_field(pages_total, None, " of %s")}') yield from self._get_video_data(metadata) pages_total = int_or_none(metadata.get('totalPages')) or None is_last = metadata.get('last') if is_last or (page_n > pages_total if pages_total else is_last is not False): return def _real_extract(self, url): channel_name = self._match_id(url) channel_info = self._download_json(f'{_API_BASE_URL}user/{channel_name}', channel_name) channel_id = channel_info['id'] tab = self._configuration_arg('tab', default=['new'])[0] return self.playlist_result( self._entries(channel_id, channel_name, self._TABS[tab]), f'{channel_id}-{tab}', f'{channel_name} - {tab.title()}', str_or_none(channel_info.get('description'))) class RokfinSearchIE(SearchInfoExtractor): IE_NAME = 'rokfin:search' IE_DESC = 'Rokfin Search' _SEARCH_KEY = 'rkfnsearch' _TYPES = { 'video': (('id', 'raw'), 'post'), 'audio': (('id', 'raw'), 'post'), 'stream': (('content_id', 'raw'), 'stream'), 'dead_stream': (('content_id', 'raw'), 'stream'), 'stack': (('content_id', 'raw'), 'stack'), } _TESTS = [{ 'url': 'rkfnsearch5:"zelenko"', 'playlist_count': 5, 'info_dict': { 'id': '"zelenko"', 'title': '"zelenko"', }, }] _db_url = None _db_access_key = None def _real_initialize(self): self._db_url, self._db_access_key = self.cache.load(self.ie_key(), 'auth', default=(None, None)) if not self._db_url: self._get_db_access_credentials() def _search_results(self, query): total_pages = None for page_number in itertools.count(1): search_results = self._run_search_query( query, data={'query': query, 'page': {'size': 100, 'current': page_number}}, note=f'Downloading page {page_number}{format_field(total_pages, None, " of ~%s")}') total_pages = traverse_obj(search_results, ('meta', 'page', 'total_pages'), expected_type=int_or_none) for result in search_results.get('results') or []: video_id_key, video_type = self._TYPES.get(traverse_obj(result, ('content_type', 'raw')), (None, None)) video_id = traverse_obj(result, video_id_key, expected_type=int_or_none) if video_id and video_type: yield self.url_result(url=f'https://rokfin.com/{video_type}/{video_id}') if not search_results.get('results'): return def _run_search_query(self, video_id, data, **kwargs): data = json.dumps(data).encode() for attempt in range(2): search_results = self._download_json( self._db_url, video_id, data=data, fatal=(attempt == 1), headers={'authorization': self._db_access_key}, **kwargs) if search_results: return search_results self.write_debug('Updating access credentials') self._get_db_access_credentials(video_id) def _get_db_access_credentials(self, video_id=None): auth_data = {'SEARCH_KEY': None, 'ENDPOINT_BASE': None} notfound_err_page = self._download_webpage( 'https://rokfin.com/discover', video_id, expected_status=404, note='Downloading home page') for js_file_path in re.findall(r'<script\b[^>]*\ssrc\s*=\s*"(/static/js/[^">]+)"', notfound_err_page): js_content = self._download_webpage( f'https://rokfin.com{js_file_path}', video_id, note='Downloading JavaScript file', fatal=False) auth_data.update(re.findall( rf'REACT_APP_({"|".join(auth_data.keys())})\s*:\s*"([^"]+)"', js_content or '')) if not all(auth_data.values()): continue self._db_url = url_or_none(f'{auth_data["ENDPOINT_BASE"]}/api/as/v1/engines/rokfin-search/search.json') self._db_access_key = f'Bearer {auth_data["SEARCH_KEY"]}' self.cache.store(self.ie_key(), 'auth', (self._db_url, self._db_access_key)) return raise ExtractorError('Unable to extract access credentials') ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/roosterteeth.py��������������������������������������������������0000664�0000000�0000000�00000036614�14675634471�0021774�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, LazyList, int_or_none, join_nonempty, parse_iso8601, parse_qs, smuggle_url, str_or_none, url_or_none, urlencode_postdata, urljoin, ) from ..utils.traversal import traverse_obj class RoosterTeethBaseIE(InfoExtractor): _NETRC_MACHINE = 'roosterteeth' _API_BASE = 'https://svod-be.roosterteeth.com' _API_BASE_URL = f'{_API_BASE}/api/v1' def _perform_login(self, username, password): if self._get_cookies(self._API_BASE_URL).get('rt_access_token'): return try: self._download_json( 'https://auth.roosterteeth.com/oauth/token', None, 'Logging in', data=urlencode_postdata({ 'client_id': '4338d2b4bdc8db1239360f28e72f0d9ddb1fd01e7a38fbb07b4b1f4ba4564cc5', 'grant_type': 'password', 'username': username, 'password': password, })) except ExtractorError as e: msg = 'Unable to login' if isinstance(e.cause, HTTPError) and e.cause.status == 401: resp = self._parse_json(e.cause.response.read().decode(), None, fatal=False) if resp: error = resp.get('extra_info') or resp.get('error_description') or resp.get('error') if error: msg += ': ' + error self.report_warning(msg) def _extract_video_info(self, data): thumbnails = [] for image in traverse_obj(data, ('included', 'images')): if image.get('type') not in ('episode_image', 'bonus_feature_image'): continue thumbnails.extend([{ 'id': name, 'url': url, } for name, url in (image.get('attributes') or {}).items() if url_or_none(url)]) attributes = data.get('attributes') or {} title = traverse_obj(attributes, 'title', 'display_title') sub_only = attributes.get('is_sponsors_only') episode_id = str_or_none(data.get('uuid')) video_id = str_or_none(data.get('id')) if video_id and 'parent_content_id' in attributes: # parent_content_id is a bonus-only key video_id += '-bonus' # there are collisions with bonus ids and regular ids elif not video_id: video_id = episode_id return { 'id': video_id, 'display_id': attributes.get('slug'), 'title': title, 'description': traverse_obj(attributes, 'description', 'caption'), 'series': traverse_obj(attributes, 'show_title', 'parent_content_title'), 'season_number': int_or_none(attributes.get('season_number')), 'season_id': str_or_none(attributes.get('season_id')), 'episode': title, 'episode_number': int_or_none(attributes.get('number')), 'episode_id': episode_id, 'channel_id': attributes.get('channel_id'), 'duration': int_or_none(attributes.get('length')), 'release_timestamp': parse_iso8601(attributes.get('original_air_date')), 'thumbnails': thumbnails, 'availability': self._availability( needs_premium=sub_only, needs_subscription=sub_only, needs_auth=sub_only, is_private=False, is_unlisted=False), 'tags': attributes.get('genres'), } class RoosterTeethIE(RoosterTeethBaseIE): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/(?:bonus-feature|episode|watch)/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'http://roosterteeth.com/episode/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'info_dict': { 'id': '9156', 'display_id': 'million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'ext': 'mp4', 'title': 'Million Dollars, But... The Game Announcement', 'description': 'md5:168a54b40e228e79f4ddb141e89fe4f5', 'thumbnail': r're:^https?://.*\.png$', 'series': 'Million Dollars, But...', 'episode': 'Million Dollars, But... The Game Announcement', 'tags': ['Game Show', 'Sketch'], 'season_number': 2, 'availability': 'public', 'episode_number': 10, 'episode_id': '00374575-464e-11e7-a302-065410f210c4', 'season': 'Season 2', 'season_id': 'ffa27d48-464d-11e7-a302-065410f210c4', 'channel_id': '92b6bb21-91d2-4b1b-bf95-3268fa0d9939', 'duration': 145, 'release_timestamp': 1462982400, 'release_date': '20160511', }, 'params': {'skip_download': True}, }, { 'url': 'https://roosterteeth.com/watch/rwby-bonus-25', 'info_dict': { 'id': '40432', 'display_id': 'rwby-bonus-25', 'title': 'Grimm', 'description': 'md5:f30ff570741213418a8d2c19868b93ab', 'episode': 'Grimm', 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', 'ext': 'mp4', 'availability': 'public', 'episode_id': 'f8117b13-f068-499e-803e-eec9ea2dec8c', 'episode_number': 3, 'tags': ['Animation'], 'season_id': '4b8f0a9e-12c4-41ed-8caa-fed15a85bab8', 'season': 'Season 1', 'series': 'RWBY: World of Remnant', 'season_number': 1, 'duration': 216, 'release_timestamp': 1413489600, 'release_date': '20141016', }, 'params': {'skip_download': True}, }, { # bonus feature with /watch/ url 'url': 'https://roosterteeth.com/watch/rwby-bonus-21', 'info_dict': { 'id': '33-bonus', 'display_id': 'rwby-bonus-21', 'title': 'Volume 5 Yang Character Short', 'description': 'md5:8c2440bc763ea90c52cfe0a68093e1f7', 'episode': 'Volume 5 Yang Character Short', 'channel_id': '92f780eb-ebfe-4bf5-a3b5-c6ad5460a5f1', 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', 'ext': 'mp4', 'availability': 'public', 'episode_id': 'f2a9f132-1fe2-44ad-8956-63d7c0267720', 'episode_number': 55, 'series': 'RWBY', 'duration': 255, 'release_timestamp': 1507993200, 'release_date': '20171014', }, 'params': {'skip_download': True}, }, { # only works with video_data['attributes']['url'] m3u8 url 'url': 'https://www.roosterteeth.com/watch/achievement-hunter-achievement-hunter-fatality-walkthrough-deathstroke-lex-luthor-captain-marvel-green-lantern-and-wonder-woman', 'info_dict': { 'id': '25394', 'ext': 'mp4', 'title': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman', 'description': 'md5:91bb934698344fb9647b1c7351f16964', 'availability': 'public', 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', 'episode': 'Fatality Walkthrough: Deathstroke, Lex Luthor, Captain Marvel, Green Lantern, and Wonder Woman', 'episode_number': 71, 'episode_id': 'ffaec998-464d-11e7-a302-065410f210c4', 'season': 'Season 2008', 'tags': ['Gaming'], 'series': 'Achievement Hunter', 'display_id': 'md5:4465ce4f001735f9d7a2ae529a543d31', 'season_id': 'ffa13340-464d-11e7-a302-065410f210c4', 'season_number': 2008, 'channel_id': '2cb2a70c-be50-46f5-93d7-84a1baabb4f7', 'duration': 189, 'release_timestamp': 1228317300, 'release_date': '20081203', }, 'params': {'skip_download': True}, }, { # brightcove fallback extraction needed 'url': 'https://roosterteeth.com/watch/lets-play-2013-126', 'info_dict': { 'id': '17845', 'ext': 'mp4', 'title': 'WWE \'13', 'availability': 'public', 'series': 'Let\'s Play', 'episode_number': 10, 'season_id': 'ffa23d9c-464d-11e7-a302-065410f210c4', 'channel_id': '75ba87e8-06fd-4482-bad9-52a4da2c6181', 'episode': 'WWE \'13', 'episode_id': 'ffdbe55e-464d-11e7-a302-065410f210c4', 'thumbnail': r're:^https?://.*\.(png|jpe?g)$', 'tags': ['Gaming', 'Our Favorites'], 'description': 'md5:b4a5226d2bbcf0dafbde11a2ba27262d', 'display_id': 'lets-play-2013-126', 'season_number': 3, 'season': 'Season 3', 'release_timestamp': 1359999840, 'release_date': '20130204', }, 'expected_warnings': ['Direct m3u8 URL returned HTTP Error 403'], 'params': {'skip_download': True}, }, { 'url': 'http://achievementhunter.roosterteeth.com/episode/off-topic-the-achievement-hunter-podcast-2016-i-didn-t-think-it-would-pass-31', 'only_matching': True, }, { 'url': 'http://funhaus.roosterteeth.com/episode/funhaus-shorts-2016-austin-sucks-funhaus-shorts', 'only_matching': True, }, { 'url': 'http://screwattack.roosterteeth.com/episode/death-battle-season-3-mewtwo-vs-shadow', 'only_matching': True, }, { 'url': 'http://theknow.roosterteeth.com/episode/the-know-game-news-season-1-boring-steam-sales-are-better', 'only_matching': True, }, { # only available for FIRST members 'url': 'http://roosterteeth.com/episode/rt-docs-the-world-s-greatest-head-massage-the-world-s-greatest-head-massage-an-asmr-journey-part-one', 'only_matching': True, }, { 'url': 'https://roosterteeth.com/watch/million-dollars-but-season-2-million-dollars-but-the-game-announcement', 'only_matching': True, }, { 'url': 'https://roosterteeth.com/bonus-feature/camp-camp-soundtrack-another-rap-song-about-foreign-cars-richie-branson', 'only_matching': True, }] _BRIGHTCOVE_ACCOUNT_ID = '6203312018001' def _extract_brightcove_formats_and_subtitles(self, bc_id, url, m3u8_url): account_id = self._search_regex( r'/accounts/(\d+)/videos/', m3u8_url, 'account id', default=self._BRIGHTCOVE_ACCOUNT_ID) info = self._downloader.get_info_extractor('BrightcoveNew').extract(smuggle_url( f'https://players.brightcove.net/{account_id}/default_default/index.html?videoId={bc_id}', {'referrer': url})) return info['formats'], info['subtitles'] def _real_extract(self, url): display_id = self._match_id(url) api_episode_url = f'{self._API_BASE_URL}/watch/{display_id}' try: video_data = self._download_json( api_episode_url + '/videos', display_id, 'Downloading video JSON metadata', headers={'Client-Type': 'web'})['data'][0] # web client-type yields ad-free streams except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 403: if self._parse_json(e.cause.response.read().decode(), display_id).get('access') is False: self.raise_login_required( f'{display_id} is only available for FIRST members') raise # XXX: additional ad-free URL at video_data['links']['download'] but often gives 403 errors m3u8_url = video_data['attributes']['url'] is_brightcove = traverse_obj(video_data, ('attributes', 'encoding_pipeline')) == 'brightcove' bc_id = traverse_obj(video_data, ('attributes', 'uid', {str})) try: formats, subtitles = self._extract_m3u8_formats_and_subtitles( m3u8_url, display_id, 'mp4', 'm3u8_native', m3u8_id='hls') except ExtractorError as e: if is_brightcove and bc_id and isinstance(e.cause, HTTPError) and e.cause.status == 403: self.report_warning( 'Direct m3u8 URL returned HTTP Error 403; retrying with Brightcove extraction') formats, subtitles = self._extract_brightcove_formats_and_subtitles(bc_id, url, m3u8_url) else: raise episode = self._download_json( api_episode_url, display_id, 'Downloading episode JSON metadata')['data'][0] return { 'display_id': display_id, 'formats': formats, 'subtitles': subtitles, **self._extract_video_info(episode), } class RoosterTeethSeriesIE(RoosterTeethBaseIE): _VALID_URL = r'https?://(?:.+?\.)?roosterteeth\.com/series/(?P<id>[^/?#&]+)' _TESTS = [{ 'url': 'https://roosterteeth.com/series/rwby?season=7', 'playlist_count': 13, 'info_dict': { 'id': 'rwby-7', 'title': 'RWBY - Season 7', }, }, { 'url': 'https://roosterteeth.com/series/the-weird-place', 'playlist_count': 7, 'info_dict': { 'id': 'the-weird-place', 'title': 'The Weird Place', }, }, { 'url': 'https://roosterteeth.com/series/role-initiative', 'playlist_mincount': 16, 'info_dict': { 'id': 'role-initiative', 'title': 'Role Initiative', }, }, { 'url': 'https://roosterteeth.com/series/let-s-play-minecraft?season=9', 'playlist_mincount': 50, 'info_dict': { 'id': 'let-s-play-minecraft-9', 'title': 'Let\'s Play Minecraft - Season 9', }, }] def _entries(self, series_id, season_number): display_id = join_nonempty(series_id, season_number) def yield_episodes(data): for episode in traverse_obj(data, ('data', lambda _, v: v['canonical_links']['self'])): yield self.url_result( urljoin('https://www.roosterteeth.com', episode['canonical_links']['self']), RoosterTeethIE, **self._extract_video_info(episode)) series_data = self._download_json( f'{self._API_BASE_URL}/shows/{series_id}/seasons?order=asc&order_by', display_id) for season_data in traverse_obj(series_data, ('data', lambda _, v: v['links']['episodes'])): idx = traverse_obj(season_data, ('attributes', 'number')) if season_number is not None and idx != season_number: continue yield from yield_episodes(self._download_json( urljoin(self._API_BASE, season_data['links']['episodes']), display_id, f'Downloading season {idx} JSON metadata', query={'per_page': 1000})) if season_number is None: # extract series-level bonus features yield from yield_episodes(self._download_json( f'{self._API_BASE_URL}/shows/{series_id}/bonus_features?order=asc&order_by&per_page=1000', display_id, 'Downloading bonus features JSON metadata', fatal=False)) def _real_extract(self, url): series_id = self._match_id(url) season_number = traverse_obj(parse_qs(url), ('season', 0), expected_type=int_or_none) entries = LazyList(self._entries(series_id, season_number)) return self.playlist_result( entries, join_nonempty(series_id, season_number), join_nonempty(entries[0].get('series'), season_number, delim=' - Season ')) ��������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rottentomatoes.py������������������������������������������������0000664�0000000�0000000�00000005747�14675634471�0022337�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, float_or_none, get_element_by_class, join_nonempty, traverse_obj, url_or_none, ) class RottenTomatoesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rottentomatoes\.com/m/(?P<playlist>[^/]+)(?:/(?P<tr>trailers)(?:/(?P<id>\w+))?)?' _TESTS = [{ 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers/11028566/', 'info_dict': { 'id': '11028566', 'ext': 'mp4', 'title': 'Toy Story 3', 'description': 'From the creators of the beloved TOY STORY films, comes a story that will reunite the gang in a whole new way.', }, 'skip': 'No longer available', }, { 'url': 'https://www.rottentomatoes.com/m/toy_story_3/trailers/VycaVoBKhGuk', 'info_dict': { 'id': 'VycaVoBKhGuk', 'ext': 'mp4', 'title': 'Toy Story 3: Trailer 2', 'description': '', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 149.941, }, }, { 'url': 'http://www.rottentomatoes.com/m/toy_story_3', 'info_dict': { 'id': 'toy_story_3', 'title': 'Toy Story 3', }, 'playlist_mincount': 4, }, { 'url': 'http://www.rottentomatoes.com/m/toy_story_3/trailers', 'info_dict': { 'id': 'toy_story_3-trailers', }, 'playlist_mincount': 5, }] def _extract_videos(self, data, display_id): for video in traverse_obj(data, (lambda _, v: v['publicId'] and v['file'] and v['type'] == 'hls')): yield { 'formats': self._extract_m3u8_formats( video['file'], display_id, 'mp4', m3u8_id='hls', fatal=False), **traverse_obj(video, { 'id': 'publicId', 'title': 'title', 'description': 'description', 'duration': ('durationInSeconds', {float_or_none}), 'thumbnail': ('image', {url_or_none}), }), } def _real_extract(self, url): playlist_id, trailers, video_id = self._match_valid_url(url).group('playlist', 'tr', 'id') playlist_id = join_nonempty(playlist_id, trailers) webpage = self._download_webpage(url, playlist_id) data = self._search_json( r'<script[^>]+\bid=["\'](?:heroV|v)ideos["\'][^>]*>', webpage, 'data', playlist_id, contains_pattern=r'\[{(?s:.+)}\]') if video_id: video_data = traverse_obj(data, lambda _, v: v['publicId'] == video_id) if not video_data: raise ExtractorError('Unable to extract video from webpage') return next(self._extract_videos(video_data, video_id)) return self.playlist_result( self._extract_videos(data, playlist_id), playlist_id, clean_html(get_element_by_class('scoreboard__title', webpage))) �������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rozhlas.py�������������������������������������������������������0000664�0000000�0000000�00000035425�14675634471�0020726�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, extract_attributes, int_or_none, remove_start, str_or_none, traverse_obj, unified_timestamp, url_or_none, ) class RozhlasIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?prehravac\.rozhlas\.cz/audio/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://prehravac.rozhlas.cz/audio/3421320', 'md5': '504c902dbc9e9a1fd50326eccf02a7e2', 'info_dict': { 'id': '3421320', 'ext': 'mp3', 'title': 'Echo Pavla Klusáka (30.06.2015 21:00)', 'description': 'Osmdesátiny Terryho Rileyho jsou skvělou příležitostí proletět se elektronickými i akustickými díly zakladatatele minimalismu, který je aktivní už přes padesát let', }, }, { 'url': 'http://prehravac.rozhlas.cz/audio/3421320/embed', 'only_matching': True, }] def _real_extract(self, url): audio_id = self._match_id(url) webpage = self._download_webpage( f'http://prehravac.rozhlas.cz/audio/{audio_id}', audio_id) title = self._html_search_regex( r'<h3>(.+?)</h3>\s*<p[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', webpage, 'title', default=None) or remove_start( self._og_search_title(webpage), 'Radio Wave - ') description = self._html_search_regex( r'<p[^>]+title=(["\'])(?P<url>(?:(?!\1).)+)\1[^>]*>.*?</p>\s*<div[^>]+id=["\']player-track', webpage, 'description', fatal=False, group='url') duration = int_or_none(self._search_regex( r'data-duration=["\'](\d+)', webpage, 'duration', default=None)) return { 'id': audio_id, 'url': f'http://media.rozhlas.cz/_audio/{audio_id}.mp3', 'title': title, 'description': description, 'duration': duration, 'vcodec': 'none', } class RozhlasBaseIE(InfoExtractor): def _extract_formats(self, entry, audio_id): formats = [] for audio in traverse_obj(entry, ('audioLinks', lambda _, v: url_or_none(v['url']))): ext = audio.get('variant') for retry in self.RetryManager(): if retry.attempt > 1: self._sleep(1, audio_id) try: if ext == 'dash': formats.extend(self._extract_mpd_formats( audio['url'], audio_id, mpd_id=ext)) elif ext == 'hls': formats.extend(self._extract_m3u8_formats( audio['url'], audio_id, 'm4a', m3u8_id=ext)) else: formats.append({ 'url': audio['url'], 'ext': ext, 'format_id': ext, 'abr': int_or_none(audio.get('bitrate')), 'acodec': ext, 'vcodec': 'none', }) except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 429: retry.error = e.cause else: self.report_warning(e.msg) return formats class RozhlasVltavaIE(RozhlasBaseIE): _VALID_URL = r'https?://(?:\w+\.rozhlas|english\.radio)\.cz/[\w-]+-(?P<id>\d+)' _TESTS = [{ 'url': 'https://wave.rozhlas.cz/papej-masicko-porcujeme-a-bilancujeme-filmy-a-serialy-ktere-letos-zabily-8891337', 'md5': 'ba2fdbc1242fc16771c7695d271ec355', 'info_dict': { 'id': '8891337', 'title': 'md5:21f99739d04ab49d8c189ec711eef4ec', }, 'playlist_count': 1, 'playlist': [{ 'md5': 'ba2fdbc1242fc16771c7695d271ec355', 'info_dict': { 'id': '10520988', 'ext': 'mp3', 'title': 'Papej masíčko! Porcujeme a bilancujeme filmy a seriály, které to letos zabily', 'description': 'md5:1c6d29fb9564e1f17fc1bb83ae7da0bc', 'duration': 1574, 'artist': 'Aleš Stuchlý', 'channel_id': 'radio-wave', }, }], }, { 'url': 'https://wave.rozhlas.cz/poslechnete-si-neklid-podcastovy-thriller-o-vine-strachu-a-vztahu-ktery-zasel-8554744', 'info_dict': { 'id': '8554744', 'title': 'Poslechněte si Neklid. Podcastový thriller o vině, strachu a vztahu, který zašel příliš daleko', }, 'playlist_count': 5, 'playlist': [{ 'md5': '93d4109cf8f40523699ae9c1d4600bdd', 'info_dict': { 'id': '9890713', 'ext': 'mp3', 'title': 'Neklid #1', 'description': '1. díl: Neklid: 1. díl', 'duration': 1025, 'artist': 'Josef Kokta', 'channel_id': 'radio-wave', 'chapter': 'Neklid #1', 'chapter_number': 1, }, }, { 'md5': 'e9763235be4a6dcf94bc8a5bac1ca126', 'info_dict': { 'id': '9890716', 'ext': 'mp3', 'title': 'Neklid #2', 'description': '2. díl: Neklid: 2. díl', 'duration': 768, 'artist': 'Josef Kokta', 'channel_id': 'radio-wave', 'chapter': 'Neklid #2', 'chapter_number': 2, }, }, { 'md5': '00b642ea94b78cc949ac84da09f87895', 'info_dict': { 'id': '9890722', 'ext': 'mp3', 'title': 'Neklid #3', 'description': '3. díl: Neklid: 3. díl', 'duration': 607, 'artist': 'Josef Kokta', 'channel_id': 'radio-wave', 'chapter': 'Neklid #3', 'chapter_number': 3, }, }, { 'md5': 'faef97b1b49da7df874740f118c19dea', 'info_dict': { 'id': '9890728', 'ext': 'mp3', 'title': 'Neklid #4', 'description': '4. díl: Neklid: 4. díl', 'duration': 621, 'artist': 'Josef Kokta', 'channel_id': 'radio-wave', 'chapter': 'Neklid #4', 'chapter_number': 4, }, }, { 'md5': '6e729fa39b647325b868d419c76f3efa', 'info_dict': { 'id': '9890734', 'ext': 'mp3', 'title': 'Neklid #5', 'description': '5. díl: Neklid: 5. díl', 'duration': 908, 'artist': 'Josef Kokta', 'channel_id': 'radio-wave', 'chapter': 'Neklid #5', 'chapter_number': 5, }, }], }, { 'url': 'https://dvojka.rozhlas.cz/karel-siktanc-cerny-jezdec-bily-kun-napinava-pohadka-o-tajemnem-prizraku-8946969', 'info_dict': { 'id': '8946969', 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku', }, 'playlist_count': 1, 'playlist': [{ 'info_dict': { 'id': '10631121', 'ext': 'm4a', 'title': 'Karel Šiktanc: Černý jezdec, bílý kůň. Napínavá pohádka o tajemném přízraku', 'description': 'Karel Šiktanc: Černý jezdec, bílý kůň', 'duration': 2656, 'artist': 'Tvůrčí skupina Drama a literatura', 'channel_id': 'dvojka', }, }], 'params': {'skip_download': 'dash'}, }] def _extract_video(self, entry): audio_id = entry['meta']['ga']['contentId'] chapter_number = traverse_obj(entry, ('meta', 'ga', 'contentSerialPart', {int_or_none})) return { 'id': audio_id, 'chapter': traverse_obj(entry, ('meta', 'ga', 'contentNameShort')) if chapter_number else None, 'chapter_number': chapter_number, 'formats': self._extract_formats(entry, audio_id), **traverse_obj(entry, { 'title': ('meta', 'ga', 'contentName'), 'description': 'title', 'duration': ('duration', {int_or_none}), 'artist': ('meta', 'ga', 'contentAuthor'), 'channel_id': ('meta', 'ga', 'contentCreator'), }), } def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) # FIXME: Use get_element_text_and_html_by_tag when it accepts less strict html data = self._parse_json(extract_attributes(self._search_regex( r'(<div class="mujRozhlasPlayer" data-player=\'[^\']+\'>)', webpage, 'player'))['data-player'], video_id)['data'] return { '_type': 'playlist', 'id': str_or_none(data.get('embedId')) or video_id, 'title': traverse_obj(data, ('series', 'title')), 'entries': map(self._extract_video, data['playlist']), } class MujRozhlasIE(RozhlasBaseIE): _VALID_URL = r'https?://(?:www\.)?mujrozhlas\.cz/(?:[^/]+/)*(?P<id>[^/?#&]+)' _TESTS = [{ # single episode extraction 'url': 'https://www.mujrozhlas.cz/vykopavky/ach-jo-zase-teleci-rizek-je-mnohem-min-cesky-nez-jsme-si-mysleli', 'md5': '6f8fd68663e64936623e67c152a669e0', 'info_dict': { 'id': '10787730', 'ext': 'mp3', 'title': 'Ach jo, zase to telecí! Řízek je mnohem míň český, než jsme si mysleli', 'description': 'md5:db7141e9caaedc9041ec7cefb9a62908', 'timestamp': 1684915200, 'modified_timestamp': 1687550432, 'series': 'Vykopávky', 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/84377046610af6ddc54d910b1dd7a22b.jpg', 'channel_id': 'radio-wave', 'upload_date': '20230524', 'modified_date': '20230623', }, }, { # serial extraction 'url': 'https://www.mujrozhlas.cz/radiokniha/jaroslava-janackova-pribeh-tajemneho-psani-o-pramenech-genezi-babicky', 'playlist_mincount': 7, 'info_dict': { 'id': 'bb2b5f4e-ffb4-35a6-a34a-046aa62d6f6b', 'title': 'Jaroslava Janáčková: Příběh tajemného psaní. O pramenech a genezi Babičky', 'description': 'md5:7434d8fac39ac9fee6df098e11dfb1be', }, }, { # show extraction 'url': 'https://www.mujrozhlas.cz/nespavci', 'playlist_mincount': 14, 'info_dict': { 'id': '09db9b37-d0f4-368c-986a-d3439f741f08', 'title': 'Nespavci', 'description': 'md5:c430adcbf9e2b9eac88b745881e814dc', }, }, { # serialPart 'url': 'https://www.mujrozhlas.cz/povidka/gustavo-adolfo-becquer-hora-duchu', 'info_dict': { 'id': '8889035', 'ext': 'm4a', 'title': 'Gustavo Adolfo Bécquer: Hora duchů', 'description': 'md5:343a15257b376c276e210b78e900ffea', 'chapter': 'Hora duchů a Polibek – dva tajemné příběhy Gustava Adolfa Bécquera', 'thumbnail': 'https://portal.rozhlas.cz/sites/default/files/images/2adfe1387fb140634be725c1ccf26214.jpg', 'timestamp': 1708173000, 'episode': 'Episode 1', 'episode_number': 1, 'series': 'Povídka', 'modified_date': '20240217', 'upload_date': '20240217', 'modified_timestamp': 1708173198, 'channel_id': 'vltava', }, 'params': {'skip_download': 'dash'}, }] def _call_api(self, path, item_id, msg='API JSON'): return self._download_json( f'https://api.mujrozhlas.cz/{path}/{item_id}', item_id, note=f'Downloading {msg}', errnote=f'Failed to download {msg}')['data'] def _extract_audio_entry(self, entry): audio_id = entry['meta']['ga']['contentId'] return { 'id': audio_id, 'formats': self._extract_formats(entry['attributes'], audio_id), **traverse_obj(entry, { 'title': ('attributes', 'title'), 'description': ('attributes', 'description'), 'episode_number': ('attributes', 'part'), 'series': ('attributes', 'mirroredShow', 'title'), 'chapter': ('attributes', 'mirroredSerial', 'title'), 'artist': ('meta', 'ga', 'contentAuthor'), 'channel_id': ('meta', 'ga', 'contentCreator'), 'timestamp': ('attributes', 'since', {unified_timestamp}), 'modified_timestamp': ('attributes', 'updated', {unified_timestamp}), 'thumbnail': ('attributes', 'asset', 'url', {url_or_none}), }), } def _entries(self, api_url, playlist_id): for page in itertools.count(1): episodes = self._download_json( api_url, playlist_id, note=f'Downloading episodes page {page}', errnote=f'Failed to download episodes page {page}', fatal=False) for episode in traverse_obj(episodes, ('data', lambda _, v: v['meta']['ga']['contentId'])): yield self._extract_audio_entry(episode) api_url = traverse_obj(episodes, ('links', 'next', {url_or_none})) if not api_url: break def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) info = self._search_json(r'\bvar\s+dl\s*=', webpage, 'info json', display_id) entity = info['siteEntityBundle'] if entity in ('episode', 'serialPart'): return self._extract_audio_entry(self._call_api( 'episodes', info['contentId'], 'episode info API JSON')) elif entity in ('show', 'serial'): playlist_id = info['contentShow'].split(':')[0] if entity == 'show' else info['contentId'] data = self._call_api(f'{entity}s', playlist_id, f'{entity} playlist JSON') api_url = data['relationships']['episodes']['links']['related'] return self.playlist_result( self._entries(api_url, playlist_id), playlist_id, **traverse_obj(data, ('attributes', { 'title': 'title', 'description': 'description', }))) else: # `entity == 'person'` not implemented yet by API, ref: # https://api.mujrozhlas.cz/persons/8367e456-2a57-379a-91bb-e699619bea49/participation raise ExtractorError(f'Unsupported entity type "{entity}"') �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rte.py�����������������������������������������������������������0000664�0000000�0000000�00000014103�14675634471�0020024�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, float_or_none, parse_iso8601, str_or_none, try_get, unescapeHTML, url_or_none, ) class RteBaseIE(InfoExtractor): def _real_extract(self, url): item_id = self._match_id(url) info_dict = {} formats = [] ENDPOINTS = ( 'https://feeds.rasset.ie/rteavgen/player/playlist?type=iptv&format=json&showId=', 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=', ) for num, ep_url in enumerate(ENDPOINTS, start=1): try: data = self._download_json(ep_url + item_id, item_id) except ExtractorError as ee: if num < len(ENDPOINTS) or formats: continue if isinstance(ee.cause, HTTPError) and ee.cause.status == 404: error_info = self._parse_json(ee.cause.response.read().decode(), item_id, fatal=False) if error_info: raise ExtractorError( '{} said: {}'.format(self.IE_NAME, error_info['message']), expected=True) raise # NB the string values in the JSON are stored using XML escaping(!) show = try_get(data, lambda x: x['shows'][0], dict) if not show: continue if not info_dict: title = unescapeHTML(show['title']) description = unescapeHTML(show.get('description')) thumbnail = show.get('thumbnail') duration = float_or_none(show.get('duration'), 1000) timestamp = parse_iso8601(show.get('published')) info_dict = { 'id': item_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'duration': duration, } mg = try_get(show, lambda x: x['media:group'][0], dict) if not mg: continue if mg.get('url'): m = re.match(r'(?P<url>rtmpe?://[^/]+)/(?P<app>.+)/(?P<playpath>mp4:.*)', mg['url']) if m: m = m.groupdict() formats.append({ 'url': m['url'] + '/' + m['app'], 'app': m['app'], 'play_path': m['playpath'], 'player_url': url, 'ext': 'flv', 'format_id': 'rtmp', }) if mg.get('hls_server') and mg.get('hls_url'): formats.extend(self._extract_m3u8_formats( mg['hls_server'] + mg['hls_url'], item_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) if mg.get('hds_server') and mg.get('hds_url'): formats.extend(self._extract_f4m_formats( mg['hds_server'] + mg['hds_url'], item_id, f4m_id='hds', fatal=False)) mg_rte_server = str_or_none(mg.get('rte:server')) mg_url = str_or_none(mg.get('url')) if mg_rte_server and mg_url: hds_url = url_or_none(mg_rte_server + mg_url) if hds_url: formats.extend(self._extract_f4m_formats( hds_url, item_id, f4m_id='hds', fatal=False)) info_dict['formats'] = formats return info_dict class RteIE(RteBaseIE): IE_NAME = 'rte' IE_DESC = 'Raidió Teilifís Éireann TV' _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)' _TEST = { 'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/', 'md5': '4a76eb3396d98f697e6e8110563d2604', 'info_dict': { 'id': '10478715', 'ext': 'mp4', 'title': 'iWitness', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'The spirit of Ireland, one voice and one minute at a time.', 'duration': 60.046, 'upload_date': '20151012', 'timestamp': 1444694160, }, } class RteRadioIE(RteBaseIE): IE_NAME = 'rte:radio' IE_DESC = 'Raidió Teilifís Éireann radio' # Radioplayer URLs have two distinct specifier formats, # the old format #!rii=<channel_id>:<id>:<playable_item_id>:<date>: # the new format #!rii=b<channel_id>_<id>_<playable_item_id>_<date>_ # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated. # An <id> uniquely defines an individual recording, and is the only part we require. _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:b?[0-9]*)(?:%3A|:|%5F|_)(?P<id>[0-9]+)' _TESTS = [{ # Old-style player URL; HLS and RTMPE formats 'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:', 'md5': 'c79ccb2c195998440065456b69760411', 'info_dict': { 'id': '10507902', 'ext': 'mp4', 'title': 'Gloria', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'md5:9ce124a7fb41559ec68f06387cabddf0', 'timestamp': 1451203200, 'upload_date': '20151227', 'duration': 7230.0, }, }, { # New-style player URL; RTMPE formats only 'url': 'http://rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=b16_3250678_8861_06-04-2012_', 'info_dict': { 'id': '3250678', 'ext': 'flv', 'title': 'The Lyric Concert with Paul Herriott', 'thumbnail': r're:^https?://.*\.jpg$', 'description': '', 'timestamp': 1333742400, 'upload_date': '20120406', 'duration': 7199.016, }, 'params': { # rtmp download 'skip_download': True, }, }] �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtl2.py����������������������������������������������������������0000664�0000000�0000000�00000007066�14675634471�0020127�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import int_or_none class RTL2IE(InfoExtractor): IE_NAME = 'rtl2' _VALID_URL = r'https?://(?:www\.)?rtl2\.de/sendung/[^/]+/(?:video/(?P<vico_id>\d+)[^/]+/(?P<vivi_id>\d+)-|folge/)(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.rtl2.de/sendung/grip-das-motormagazin/folge/folge-203-0', 'info_dict': { 'id': 'folge-203-0', 'ext': 'f4v', 'title': 'GRIP sucht den Sommerkönig', 'description': 'md5:e3adbb940fd3c6e76fa341b8748b562f', }, 'params': { # rtmp download 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rtl2.de/sendung/koeln-50667/video/5512-anna/21040-anna-erwischt-alex/', 'info_dict': { 'id': 'anna-erwischt-alex', 'ext': 'mp4', 'title': 'Anna erwischt Alex!', 'description': 'Anna nimmt ihrem Vater nicht ab, dass er nicht spielt. Und tatsächlich erwischt sie ihn auf frischer Tat.', }, 'params': { # rtmp download 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }] def _real_extract(self, url): vico_id, vivi_id, display_id = self._match_valid_url(url).groups() if not vico_id: webpage = self._download_webpage(url, display_id) mobj = re.search( r'data-collection="(?P<vico_id>\d+)"[^>]+data-video="(?P<vivi_id>\d+)"', webpage) if mobj: vico_id = mobj.group('vico_id') vivi_id = mobj.group('vivi_id') else: vico_id = self._html_search_regex( r'vico_id\s*:\s*([0-9]+)', webpage, 'vico_id') vivi_id = self._html_search_regex( r'vivi_id\s*:\s*([0-9]+)', webpage, 'vivi_id') info = self._download_json( 'https://service.rtl2.de/api-player-vipo/video.php', display_id, query={ 'vico_id': vico_id, 'vivi_id': vivi_id, }) video_info = info['video'] title = video_info['titel'] formats = [] rtmp_url = video_info.get('streamurl') if rtmp_url: rtmp_url = rtmp_url.replace('\\', '') stream_url = 'mp4:' + self._html_search_regex(r'/ondemand/(.+)', rtmp_url, 'stream URL') rtmp_conn = ['S:connect', 'O:1', 'NS:pageUrl:' + url, 'NB:fpad:0', 'NN:videoFunction:1', 'O:0'] formats.append({ 'format_id': 'rtmp', 'url': rtmp_url, 'play_path': stream_url, 'player_url': 'https://www.rtl2.de/sites/default/modules/rtl2/jwplayer/jwplayer-7.6.0/jwplayer.flash.swf', 'page_url': url, 'flash_version': 'LNX 11,2,202,429', 'rtmp_conn': rtmp_conn, 'no_resume': True, 'quality': 1, }) m3u8_url = video_info.get('streamurl_hls') if m3u8_url: formats.extend(self._extract_akamai_formats(m3u8_url, display_id)) return { 'id': display_id, 'title': title, 'thumbnail': video_info.get('image'), 'description': video_info.get('beschreibung'), 'duration': int_or_none(video_info.get('duration')), 'formats': formats, } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtlnl.py���������������������������������������������������������0000664�0000000�0000000�00000027541�14675634471�0020377�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( int_or_none, parse_duration, ) class RtlNlIE(InfoExtractor): IE_NAME = 'rtl.nl' IE_DESC = 'rtl.nl and rtlxl.nl' _EMBED_REGEX = [r'<iframe[^>]+?\bsrc=(?P<q1>[\'"])(?P<url>(?:https?:)?//(?:(?:www|static)\.)?rtl\.nl/(?:system/videoplayer/[^"]+(?:video_)?)?embed[^"]+)(?P=q1)'] _VALID_URL = r'''(?x) https?://(?:(?:www|static)\.)? (?: rtlxl\.nl/(?:[^\#]*\#!|programma)/[^/]+/| rtl\.nl/(?:(?:system/videoplayer/(?:[^/]+/)+(?:video_)?embed\.html|embed)\b.+?\buuid=|video/)| embed\.rtl\.nl/\#uuid= ) (?P<id>[0-9a-f-]+)''' _TESTS = [{ # new URL schema 'url': 'https://www.rtlxl.nl/programma/rtl-nieuws/0bd1384d-d970-3086-98bb-5c104e10c26f', 'md5': '490428f1187b60d714f34e1f2e3af0b6', 'info_dict': { 'id': '0bd1384d-d970-3086-98bb-5c104e10c26f', 'ext': 'mp4', 'title': 'RTL Nieuws', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'timestamp': 1593293400, 'upload_date': '20200627', 'duration': 661.08, }, }, { # old URL schema 'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'md5': '473d1946c1fdd050b2c0161a4b13c373', 'info_dict': { 'id': '82b1aad1-4a14-3d7b-b554-b0aed1b2c416', 'ext': 'mp4', 'title': 'RTL Nieuws', 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e', 'timestamp': 1461951000, 'upload_date': '20160429', 'duration': 1167.96, }, 'skip': '404', }, { # best format available a3t 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 'md5': 'dea7474214af1271d91ef332fb8be7ea', 'info_dict': { 'id': '84ae5571-ac25-4225-ae0c-ef8d9efb2aed', 'ext': 'mp4', 'timestamp': 1424039400, 'title': 'RTL Nieuws - Nieuwe beelden Kopenhagen: chaos direct na aanslag', 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed$', 'upload_date': '20150215', 'description': 'Er zijn nieuwe beelden vrijgegeven die vlak na de aanslag in Kopenhagen zijn gemaakt. Op de video is goed te zien hoe omstanders zich bekommeren om één van de slachtoffers, terwijl de eerste agenten ter plaatse komen.', }, }, { # empty synopsis and missing episodes (see https://github.com/ytdl-org/youtube-dl/issues/6275) # best format available nettv 'url': 'http://www.rtl.nl/system/videoplayer/derden/rtlnieuws/video_embed.html#uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a/autoplay=false', 'info_dict': { 'id': 'f536aac0-1dc3-4314-920e-3bd1c5b3811a', 'ext': 'mp4', 'title': 'RTL Nieuws - Meer beelden van overval juwelier', 'thumbnail': r're:^https?://screenshots\.rtl\.nl/(?:[^/]+/)*sz=[0-9]+x[0-9]+/uuid=f536aac0-1dc3-4314-920e-3bd1c5b3811a$', 'timestamp': 1437233400, 'upload_date': '20150718', 'duration': 30.474, }, 'params': { 'skip_download': True, }, }, { # encrypted m3u8 streams, georestricted 'url': 'http://www.rtlxl.nl/#!/afl-2-257632/52a74543-c504-4cde-8aa8-ec66fe8d68a7', 'only_matching': True, }, { 'url': 'http://www.rtl.nl/system/videoplayer/derden/embed.html#!/uuid=bb0353b0-d6a4-1dad-90e9-18fe75b8d1f0', 'only_matching': True, }, { 'url': 'http://rtlxl.nl/?_ga=1.204735956.572365465.1466978370#!/rtl-nieuws-132237/3c487912-023b-49ac-903e-2c5d79f8410f', 'only_matching': True, }, { 'url': 'https://www.rtl.nl/video/c603c9c2-601d-4b5e-8175-64f1e942dc7d/', 'only_matching': True, }, { 'url': 'https://static.rtl.nl/embed/?uuid=1a2970fc-5c0b-43ff-9fdc-927e39e6d1bc&autoplay=false&publicatiepunt=rtlnieuwsnl', 'only_matching': True, }, { # new embed URL schema 'url': 'https://embed.rtl.nl/#uuid=84ae5571-ac25-4225-ae0c-ef8d9efb2aed/autoplay=false', 'only_matching': True, }] def _real_extract(self, url): uuid = self._match_id(url) info = self._download_json( f'http://www.rtl.nl/system/s4m/vfd/version=2/uuid={uuid}/fmt=adaptive/', uuid) material = info['material'][0] title = info['abstracts'][0]['name'] subtitle = material.get('title') if subtitle: title += f' - {subtitle}' description = material.get('synopsis') meta = info.get('meta', {}) videopath = material['videopath'] m3u8_url = meta.get('videohost', 'http://manifest.us.rtl.nl') + videopath formats = self._extract_m3u8_formats( m3u8_url, uuid, 'mp4', m3u8_id='hls', fatal=False) thumbnails = [] for p in ('poster_base_url', '"thumb_base_url"'): if not meta.get(p): continue thumbnails.append({ 'url': self._proto_relative_url(meta[p] + uuid), 'width': int_or_none(self._search_regex( r'/sz=([0-9]+)', meta[p], 'thumbnail width', fatal=False)), 'height': int_or_none(self._search_regex( r'/sz=[0-9]+x([0-9]+)', meta[p], 'thumbnail height', fatal=False)), }) return { 'id': uuid, 'title': title, 'formats': formats, 'timestamp': material['original_date'], 'description': description, 'duration': parse_duration(material.get('duration')), 'thumbnails': thumbnails, } class RTLLuBaseIE(InfoExtractor): _MEDIA_REGEX = { 'video': r'<rtl-player\s[^>]*\bhls\s*=\s*"([^"]+)', 'audio': r'<rtl-audioplayer\s[^>]*\bsrc\s*=\s*"([^"]+)', 'thumbnail': r'<rtl-player\s[^>]*\bposter\s*=\s*"([^"]+)', } def get_media_url(self, webpage, video_id, media_type): return self._search_regex(self._MEDIA_REGEX[media_type], webpage, f'{media_type} url', default=None) def get_formats_and_subtitles(self, webpage, video_id): video_url, audio_url = self.get_media_url(webpage, video_id, 'video'), self.get_media_url(webpage, video_id, 'audio') formats, subtitles = [], {} if video_url is not None: formats, subtitles = self._extract_m3u8_formats_and_subtitles(video_url, video_id) if audio_url is not None: formats.append({'url': audio_url, 'ext': 'mp3', 'vcodec': 'none'}) return formats, subtitles def _real_extract(self, url): video_id = self._match_id(url) is_live = video_id in ('live', 'live-2', 'lauschteren') # TODO: extract comment from https://www.rtl.lu/comments?status=1&order=desc&context=news|article|<video_id> # we can context from <rtl-comments context=<context> in webpage webpage = self._download_webpage(url, video_id) formats, subtitles = self.get_formats_and_subtitles(webpage, video_id) return { 'id': video_id, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage, default=None), 'formats': formats, 'subtitles': subtitles, 'thumbnail': self.get_media_url(webpage, video_id, 'thumbnail') or self._og_search_thumbnail(webpage, default=None), 'is_live': is_live, } class RTLLuTeleVODIE(RTLLuBaseIE): IE_NAME = 'rtl.lu:tele-vod' _VALID_URL = r'https?://(?:www\.)?rtl\.lu/(tele/(?P<slug>[\w-]+)/v/|video/)(?P<id>\d+)(\.html)?' _TESTS = [{ 'url': 'https://www.rtl.lu/tele/de-journal-vun-der-tele/v/3266757.html', 'info_dict': { 'id': '3266757', 'title': 'Informatiounsversammlung Héichwaasser', 'ext': 'mp4', 'thumbnail': 'https://replay-assets.rtl.lu/2021/11/16/d3647fc4-470d-11ec-adc2-3a00abd6e90f_00008.jpg', 'description': 'md5:b1db974408cc858c9fd241812e4a2a14', }, }, { 'url': 'https://www.rtl.lu/video/3295215', 'info_dict': { 'id': '3295215', 'title': 'Kulturassisen iwwer d\'Bestandsopnam vum Lëtzebuerger Konscht', 'ext': 'mp4', 'thumbnail': 'https://replay-assets.rtl.lu/2022/06/28/0000_3295215_0000.jpg', 'description': 'md5:85bcd4e0490aa6ec969d9bf16927437b', }, }] class RTLLuArticleIE(RTLLuBaseIE): IE_NAME = 'rtl.lu:article' _VALID_URL = r'https?://(?:(www|5minutes|today)\.)rtl\.lu/(?:[\w-]+)/(?:[\w-]+)/a/(?P<id>\d+)\.html' _TESTS = [{ # Audio-only 'url': 'https://www.rtl.lu/sport/news/a/1934360.html', 'info_dict': { 'id': '1934360', 'ext': 'mp3', 'thumbnail': 'https://static.rtl.lu/rtl2008.lu/nt/p/2022/06/28/19/e4b37d66ddf00bab4c45617b91a5bb9b.jpeg', 'description': 'md5:5eab4a2a911c1fff7efc1682a38f9ef7', 'title': 'md5:40aa85f135578fbd549d3c9370321f99', }, }, { # 5minutes 'url': 'https://5minutes.rtl.lu/espace-frontaliers/frontaliers-en-questions/a/1853173.html', 'info_dict': { 'id': '1853173', 'ext': 'mp4', 'description': 'md5:ac031da0740e997a5cf4633173634fee', 'title': 'md5:87e17722ed21af0f24be3243f4ec0c46', 'thumbnail': 'https://replay-assets.rtl.lu/2022/01/26/screenshot_20220126104933_3274749_12b249833469b0d6e4440a1dec83cdfa.jpg', }, }, { # today.lu 'url': 'https://today.rtl.lu/entertainment/news/a/1936203.html', 'info_dict': { 'id': '1936203', 'ext': 'mp4', 'title': 'Once Upon A Time...zu Lëtzebuerg: The Three Witches\' Tower', 'description': 'The witchy theme continues in the latest episode of Once Upon A Time...', 'thumbnail': 'https://replay-assets.rtl.lu/2022/07/02/screenshot_20220702122859_3290019_412dc5185951b7f6545a4039c8be9235.jpg', }, }] class RTLLuLiveIE(RTLLuBaseIE): _VALID_URL = r'https?://www\.rtl\.lu/(?:tele|radio)/(?P<id>live(?:-\d+)?|lauschteren)' _TESTS = [{ # Tele:live 'url': 'https://www.rtl.lu/tele/live', 'info_dict': { 'id': 'live', 'ext': 'mp4', 'live_status': 'is_live', 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/channel1.jpg', }, }, { # Tele:live-2 'url': 'https://www.rtl.lu/tele/live-2', 'info_dict': { 'id': 'live-2', 'ext': 'mp4', 'live_status': 'is_live', 'title': r're:RTL - Télé LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/channel2.jpg', }, }, { # Radio:lauschteren 'url': 'https://www.rtl.lu/radio/lauschteren', 'info_dict': { 'id': 'lauschteren', 'ext': 'mp4', 'live_status': 'is_live', 'title': r're:RTL - Radio LIVE \d{4}-\d{2}-\d{2} \d{2}:\d{2}', 'thumbnail': 'https://static.rtl.lu/livestream/rtlradiowebtv.jpg', }, }] class RTLLuRadioIE(RTLLuBaseIE): _VALID_URL = r'https?://www\.rtl\.lu/radio/(?:[\w-]+)/s/(?P<id>\d+)(\.html)?' _TESTS = [{ 'url': 'https://www.rtl.lu/radio/5-vir-12/s/4033058.html', 'info_dict': { 'id': '4033058', 'ext': 'mp3', 'description': 'md5:f855a4f3e3235393ae47ed1db5d934b9', 'title': '5 vir 12 - Stau um Stau', 'thumbnail': 'https://static.rtl.lu/rtlg//2022/06/24/c9c19e5694a14be46a3647a3760e1f62.jpg', }, }] ���������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtnews.py��������������������������������������������������������0000664�0000000�0000000�00000017150�14675634471�0020561�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import js_to_json class RTNewsIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rt\.com/[^/]+/(?:[^/]+/)?(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.rt.com/sport/546301-djokovic-arrives-belgrade-crowds/', 'playlist_mincount': 2, 'info_dict': { 'id': '546301', 'title': 'Crowds gather to greet deported Djokovic as he returns to Serbia (VIDEO)', 'description': 'md5:1d5bfe1a988d81fd74227cfdf93d314d', 'thumbnail': 'https://cdni.rt.com/files/2022.01/article/61e587a085f540102c3386c1.png', }, }, { 'url': 'https://www.rt.com/shows/in-question/535980-plot-to-assassinate-julian-assange/', 'playlist_mincount': 1, 'info_dict': { 'id': '535980', 'title': 'The plot to assassinate Julian Assange', 'description': 'md5:55279ce5e4441dc1d16e2e4a730152cd', 'thumbnail': 'https://cdni.rt.com/files/2021.09/article/615226f42030274e8879b53d.png', }, 'playlist': [{ 'info_dict': { 'id': '6152271d85f5400464496162', 'ext': 'mp4', 'title': '6152271d85f5400464496162', }, }], }] def _entries(self, webpage): video_urls = set(re.findall(r'https://cdnv\.rt\.com/.*[a-f0-9]+\.mp4', webpage)) for v_url in video_urls: v_id = re.search(r'([a-f0-9]+)\.mp4', v_url).group(1) if v_id: yield { 'id': v_id, 'title': v_id, 'url': v_url, } def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) return { '_type': 'playlist', 'id': playlist_id, 'entries': self._entries(webpage), 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } class RTDocumentryIE(InfoExtractor): _VALID_URL = r'https?://rtd\.rt\.com/(?:(?:series|shows)/[^/]+|films)/(?P<id>[^/?$&#]+)' _TESTS = [{ 'url': 'https://rtd.rt.com/films/escobars-hitman/', 'info_dict': { 'id': 'escobars-hitman', 'ext': 'mp4', 'title': "Escobar's Hitman. Former drug-gang killer, now loved and loathed in Colombia", 'description': 'md5:647c76984b7cb9a8b52a567e87448d88', 'thumbnail': 'https://cdni.rt.com/rtd-files/films/escobars-hitman/escobars-hitman_11.jpg', 'average_rating': 8.53, 'duration': 3134.0, }, 'params': {'skip_download': True}, }, { 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/iskander-tactical-system-natos-headache/', 'info_dict': { 'id': 'iskander-tactical-system-natos-headache', 'ext': 'mp4', 'title': "Iskander tactical system. NATO's headache | The Kalashnikova Show. Episode 10", 'description': 'md5:da7c24a0aa67bc2bb88c86658508ca87', 'thumbnail': 'md5:89de8ce38c710b7c501ff02d47e2aa89', 'average_rating': 9.27, 'duration': 274.0, 'timestamp': 1605726000, 'view_count': int, 'upload_date': '20201118', }, 'params': {'skip_download': True}, }, { 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/introduction-to-safe-digital-life-ep2/', 'info_dict': { 'id': 'introduction-to-safe-digital-life-ep2', 'ext': 'mp4', 'title': 'How to Keep your Money away from Hackers | I am Hacked. Episode 2', 'description': 'md5:c46fa9a5af86c0008c45a3940a8cce87', 'thumbnail': 'md5:a5e81b9bf5aed8f5e23d9c053601b825', 'average_rating': 10.0, 'duration': 1524.0, 'timestamp': 1636977600, 'view_count': int, 'upload_date': '20211115', }, 'params': {'skip_download': True}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) ld_json = self._search_json_ld(webpage, None, fatal=False) if not ld_json: self.raise_no_formats('No video/audio found at the provided url.', expected=True) media_json = self._parse_json( self._search_regex(r'(?s)\'Med\'\s*:\s*\[\s*({.+})\s*\]\s*};', webpage, 'media info'), video_id, transform_source=js_to_json) if 'title' not in ld_json and 'title' in media_json: ld_json['title'] = media_json['title'] formats = [{'url': src['file']} for src in media_json.get('sources') or [] if src.get('file')] return { 'id': video_id, 'thumbnail': media_json.get('image'), 'formats': formats, **ld_json, } class RTDocumentryPlaylistIE(InfoExtractor): _VALID_URL = r'https?://rtd\.rt\.com/(?:series|shows)/(?P<id>[^/]+)/$' _TESTS = [{ 'url': 'https://rtd.rt.com/series/i-am-hacked-trailer/', 'playlist_mincount': 6, 'info_dict': { 'id': 'i-am-hacked-trailer', }, }, { 'url': 'https://rtd.rt.com/shows/the-kalashnikova-show-military-secrets-anna-knishenko/', 'playlist_mincount': 34, 'info_dict': { 'id': 'the-kalashnikova-show-military-secrets-anna-knishenko', }, }] def _entries(self, webpage, playlist_id): video_urls = set(re.findall(r'list-2__link\s*"\s*href="([^"]+)"', webpage)) for v_url in video_urls: if playlist_id not in v_url: continue yield self.url_result( f'https://rtd.rt.com{v_url}', ie=RTDocumentryIE.ie_key()) def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) return { '_type': 'playlist', 'id': playlist_id, 'entries': self._entries(webpage, playlist_id), } class RuptlyIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ruptly\.tv/[a-z]{2}/videos/(?P<id>\d+-\d+)' _TESTS = [{ 'url': 'https://www.ruptly.tv/en/videos/20220112-020-Japan-Double-trouble-Tokyo-zoo-presents-adorable-panda-twins', 'info_dict': { 'id': '20220112-020', 'ext': 'mp4', 'title': 'Japan: Double trouble! Tokyo zoo presents adorable panda twins | Video Ruptly', 'description': 'md5:85a8da5fdb31486f0562daf4360ce75a', 'thumbnail': 'https://storage.ruptly.tv/thumbnails/20220112-020/i6JQKnTNpYuqaXsR/i6JQKnTNpYuqaXsR.jpg', }, 'params': {'skip_download': True}, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) m3u8_url = self._search_regex(r'preview_url"\s?:\s?"(https?://storage\.ruptly\.tv/video_projects/.+\.m3u8)"', webpage, 'm3u8 url', fatal=False) if not m3u8_url: self.raise_no_formats('No video/audio found at the provided url.', expected=True) formats, subs = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, ext='mp4') return { 'id': video_id, 'formats': formats, 'subtitles': subs, 'title': self._og_search_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtp.py�����������������������������������������������������������0000664�0000000�0000000�00000010130�14675634471�0020033�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 import json import re import urllib.parse from .common import InfoExtractor from ..utils import js_to_json class RTPIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/(?:(?:estudoemcasa|palco|zigzag)/)?p(?P<program_id>[0-9]+)/(?P<id>[^/?#]+)' _TESTS = [{ 'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas', 'md5': 'e736ce0c665e459ddb818546220b4ef8', 'info_dict': { 'id': 'e174042', 'ext': 'mp3', 'title': 'Paixões Cruzadas', 'description': 'As paixões musicais de António Cartaxo e António Macedo', 'thumbnail': r're:^https?://.*\.jpg', }, }, { 'url': 'https://www.rtp.pt/play/zigzag/p13166/e757904/25-curiosidades-25-de-abril', 'md5': '9a81ed53f2b2197cfa7ed455b12f8ade', 'info_dict': { 'id': 'e757904', 'ext': 'mp4', 'title': '25 Curiosidades, 25 de Abril', 'description': 'Estudar ou não estudar - Em cada um dos episódios descobrimos uma curiosidade acerca de como era viver em Portugal antes da revolução do 25 de abr', 'thumbnail': r're:^https?://.*\.jpg', }, }, { 'url': 'http://www.rtp.pt/play/p831/a-quimica-das-coisas', 'only_matching': True, }, { 'url': 'https://www.rtp.pt/play/estudoemcasa/p7776/portugues-1-ano', 'only_matching': True, }, { 'url': 'https://www.rtp.pt/play/palco/p13785/l7nnon', 'only_matching': True, }] _RX_OBFUSCATION = re.compile(r'''(?xs) atob\s*\(\s*decodeURIComponent\s*\(\s* (\[[0-9A-Za-z%,'"]*\]) \s*\.\s*join\(\s*(?:""|'')\s*\)\s*\)\s*\) ''') def __unobfuscate(self, data, *, video_id): if data.startswith('{'): data = self._RX_OBFUSCATION.sub( lambda m: json.dumps( base64.b64decode(urllib.parse.unquote( ''.join(self._parse_json(m.group(1), video_id)), )).decode('iso-8859-1')), data) return js_to_json(data) def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._html_search_meta( 'twitter:title', webpage, display_name='title', fatal=True) f, config = self._search_regex( r'''(?sx) (?:var\s+f\s*=\s*(?P<f>".*?"|{[^;]+?});\s*)? var\s+player1\s+=\s+new\s+RTPPlayer\s*\((?P<config>{(?:(?!\*/).)+?})\);(?!\s*\*/) ''', webpage, 'player config', group=('f', 'config')) config = self._parse_json( config, video_id, lambda data: self.__unobfuscate(data, video_id=video_id)) f = config['file'] if not f else self._parse_json( f, video_id, lambda data: self.__unobfuscate(data, video_id=video_id)) formats = [] if isinstance(f, dict): f_hls = f.get('hls') if f_hls is not None: formats.extend(self._extract_m3u8_formats( f_hls, video_id, 'mp4', 'm3u8_native', m3u8_id='hls')) f_dash = f.get('dash') if f_dash is not None: formats.extend(self._extract_mpd_formats(f_dash, video_id, mpd_id='dash')) else: formats.append({ 'format_id': 'f', 'url': f, 'vcodec': 'none' if config.get('mediaType') == 'audio' else None, }) subtitles = {} vtt = config.get('vtt') if vtt is not None: for lcode, lname, url in vtt: subtitles.setdefault(lcode, []).append({ 'name': lname, 'url': url, }) return { 'id': video_id, 'title': title, 'formats': formats, 'description': self._html_search_meta(['description', 'twitter:description'], webpage), 'thumbnail': config.get('poster') or self._og_search_thumbnail(webpage), 'subtitles': subtitles, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtrfm.py���������������������������������������������������������0000664�0000000�0000000�00000005270�14675634471�0020371�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor class RTRFMIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtrfm\.com\.au/(?:shows|show-episode)/(?P<id>[^/?\#&]+)' _TESTS = [ { 'url': 'https://rtrfm.com.au/shows/breakfast/', 'md5': '46168394d3a5ce237cf47e85d0745413', 'info_dict': { 'id': 'breakfast-2021-11-16', 'ext': 'mp3', 'series': 'Breakfast with Taylah', 'title': r're:^Breakfast with Taylah \d{4}-\d{2}-\d{2}$', 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', }, 'skip': 'ID and md5 changes daily', }, { 'url': 'https://rtrfm.com.au/show-episode/breakfast-2021-11-11/', 'md5': '396bedf1e40f96c62b30d4999202a790', 'info_dict': { 'id': 'breakfast-2021-11-11', 'ext': 'mp3', 'series': 'Breakfast with Taylah', 'title': 'Breakfast with Taylah 2021-11-11', 'description': 'md5:0979c3ab1febfbec3f1ccb743633c611', }, }, { 'url': 'https://rtrfm.com.au/show-episode/breakfast-2020-06-01/', 'md5': '594027f513ec36a24b15d65007a24dff', 'info_dict': { 'id': 'breakfast-2020-06-01', 'ext': 'mp3', 'series': 'Breakfast with Taylah', 'title': 'Breakfast with Taylah 2020-06-01', 'description': r're:^Breakfast with Taylah ', }, 'skip': 'This audio has expired', }, ] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) show, date, title = self._search_regex( r'''\.playShow(?:From)?\(['"](?P<show>[^'"]+)['"],\s*['"](?P<date>[0-9]{4}-[0-9]{2}-[0-9]{2})['"],\s*['"](?P<title>[^'"]+)['"]''', webpage, 'details', group=('show', 'date', 'title')) url = self._download_json( 'https://restreams.rtrfm.com.au/rzz', show, 'Downloading MP3 URL', query={'n': show, 'd': date})['u'] # This is the only indicator of an error until trying to download the URL and # downloads of mp4 URLs always fail (403 for current episodes, 404 for missing). if '.mp4' in url: url = None self.raise_no_formats('Expired or no episode on this date', expected=True) return { 'id': f'{show}-{date}', 'title': f'{title} {date}', 'series': title, 'url': url, 'release_date': date, 'description': self._og_search_description(webpage), } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rts.py�����������������������������������������������������������0000664�0000000�0000000�00000022423�14675634471�0020046�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .srgssr import SRGSSRIE from ..utils import ( determine_ext, int_or_none, parse_duration, parse_iso8601, unescapeHTML, urljoin, ) class RTSIE(SRGSSRIE): # XXX: Do not subclass from concrete IE _WORKING = False IE_DESC = 'RTS.ch' _VALID_URL = r'rts:(?P<rts_id>\d+)|https?://(?:.+?\.)?rts\.ch/(?:[^/]+/){2,}(?P<id>[0-9]+)-(?P<display_id>.+?)\.html' _TESTS = [ { 'url': 'http://www.rts.ch/archives/tv/divers/3449373-les-enfants-terribles.html', 'md5': '753b877968ad8afaeddccc374d4256a5', 'info_dict': { 'id': '3449373', 'display_id': 'les-enfants-terribles', 'ext': 'mp4', 'duration': 1488, 'title': 'Les Enfants Terribles', 'description': 'France Pommier et sa soeur Luce Feral, les deux filles de ce groupe de 5.', 'uploader': 'Divers', 'upload_date': '19680921', 'timestamp': -40280400, 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rts.ch/emissions/passe-moi-les-jumelles/5624067-entre-ciel-et-mer.html', 'info_dict': { 'id': '5624065', 'title': 'Passe-moi les jumelles', }, 'playlist_mincount': 4, }, { 'url': 'http://www.rts.ch/video/sport/hockey/5745975-1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski.html', 'info_dict': { 'id': '5745975', 'display_id': '1-2-kloten-fribourg-5-2-second-but-pour-gotteron-par-kwiatowski', 'ext': 'mp4', 'duration': 48, 'title': '1/2, Kloten - Fribourg (5-2): second but pour Gottéron par Kwiatowski', 'description': 'Hockey - Playoff', 'uploader': 'Hockey', 'upload_date': '20140403', 'timestamp': 1396556882, 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'params': { # m3u8 download 'skip_download': True, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], 'skip': 'Blocked outside Switzerland', }, { 'url': 'http://www.rts.ch/video/info/journal-continu/5745356-londres-cachee-par-un-epais-smog.html', 'md5': '9bb06503773c07ce83d3cbd793cebb91', 'info_dict': { 'id': '5745356', 'display_id': 'londres-cachee-par-un-epais-smog', 'ext': 'mp4', 'duration': 33, 'title': 'Londres cachée par un épais smog', 'description': 'Un important voile de smog recouvre Londres depuis mercredi, provoqué par la pollution et du sable du Sahara.', 'uploader': 'L\'actu en vidéo', 'upload_date': '20140403', 'timestamp': 1396537322, 'thumbnail': r're:^https?://.*\.image', 'view_count': int, }, 'expected_warnings': ['Unable to download f4m manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rts.ch/audio/couleur3/programmes/la-belle-video-de-stephane-laurenceau/5706148-urban-hippie-de-damien-krisl-03-04-2014.html', 'md5': 'dd8ef6a22dff163d063e2a52bc8adcae', 'info_dict': { 'id': '5706148', 'display_id': 'urban-hippie-de-damien-krisl-03-04-2014', 'ext': 'mp3', 'duration': 123, 'title': '"Urban Hippie", de Damien Krisl', 'description': 'Des Hippies super glam.', 'upload_date': '20140403', 'timestamp': 1396551600, }, }, { # article with videos on rhs 'url': 'http://www.rts.ch/sport/hockey/6693917-hockey-davos-decroche-son-31e-titre-de-champion-de-suisse.html', 'info_dict': { 'id': '6693917', 'title': 'Hockey: Davos décroche son 31e titre de champion de Suisse', }, 'playlist_mincount': 5, }, { 'url': 'http://pages.rts.ch/emissions/passe-moi-les-jumelles/5624065-entre-ciel-et-mer.html', 'only_matching': True, }, ] def _real_extract(self, url): m = self._match_valid_url(url) media_id = m.group('rts_id') or m.group('id') display_id = m.group('display_id') or media_id def download_json(internal_id): return self._download_json( f'http://www.rts.ch/a/{internal_id}.html?f=json/article', display_id) all_info = download_json(media_id) # media_id extracted out of URL is not always a real id if 'video' not in all_info and 'audio' not in all_info: entries = [] for item in all_info.get('items', []): item_url = item.get('url') if not item_url: continue entries.append(self.url_result(item_url, 'RTS')) if not entries: page, urlh = self._download_webpage_handle(url, display_id) if re.match(self._VALID_URL, urlh.url).group('id') != media_id: return self.url_result(urlh.url, 'RTS') # article with videos on rhs videos = re.findall( r'<article[^>]+class="content-item"[^>]*>\s*<a[^>]+data-video-urn="urn:([^"]+)"', page) if not videos: videos = re.findall( r'(?s)<iframe[^>]+class="srg-player"[^>]+src="[^"]+urn:([^"]+)"', page) if videos: entries = [self.url_result(f'srgssr:{video_urn}', 'SRGSSR') for video_urn in videos] if entries: return self.playlist_result(entries, media_id, all_info.get('title')) internal_id = self._html_search_regex( r'<(?:video|audio) data-id="([0-9]+)"', page, 'internal video id') all_info = download_json(internal_id) media_type = 'video' if 'video' in all_info else 'audio' # check for errors self._get_media_data('rts', media_type, media_id) info = all_info['video']['JSONinfo'] if 'video' in all_info else all_info['audio'] title = info['title'] def extract_bitrate(url): return int_or_none(self._search_regex( r'-([0-9]+)k\.', url, 'bitrate', default=None)) formats = [] streams = info.get('streams', {}) for format_id, format_url in streams.items(): if format_id == 'hds_sd' and 'hds' in streams: continue if format_id == 'hls_sd' and 'hls' in streams: continue ext = determine_ext(format_url) if ext in ('m3u8', 'f4m'): format_url = self._get_tokenized_src(format_url, media_id, format_id) if ext == 'f4m': formats.extend(self._extract_f4m_formats( format_url + ('?' if '?' not in format_url else '&') + 'hdcore=3.4.0', media_id, f4m_id=format_id, fatal=False)) else: formats.extend(self._extract_m3u8_formats( format_url, media_id, 'mp4', 'm3u8_native', m3u8_id=format_id, fatal=False)) else: formats.append({ 'format_id': format_id, 'url': format_url, 'tbr': extract_bitrate(format_url), }) download_base = 'http://rtsww{}-d.rts.ch/'.format('-a' if media_type == 'audio' else '') for media in info.get('media', []): media_url = media.get('url') if not media_url or re.match(r'https?://', media_url): continue rate = media.get('rate') ext = media.get('ext') or determine_ext(media_url, 'mp4') format_id = ext if rate: format_id += '-%dk' % rate formats.append({ 'format_id': format_id, 'url': urljoin(download_base, media_url), 'tbr': rate or extract_bitrate(media_url), }) self._check_formats(formats, media_id) duration = info.get('duration') or info.get('cutout') or info.get('cutduration') if isinstance(duration, str): duration = parse_duration(duration) return { 'id': media_id, 'display_id': display_id, 'formats': formats, 'title': title, 'description': info.get('intro'), 'duration': duration, 'view_count': int_or_none(info.get('plays')), 'uploader': info.get('programName'), 'timestamp': parse_iso8601(info.get('broadcast_date')), 'thumbnail': unescapeHTML(info.get('preview_image_url')), } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtvcplay.py������������������������������������������������������0000664�0000000�0000000�00000025737�14675634471�0021115�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( ExtractorError, clean_html, determine_ext, float_or_none, int_or_none, js_to_json, mimetype2ext, traverse_obj, url_or_none, urljoin, ) class RTVCPlayBaseIE(InfoExtractor): _BASE_VALID_URL = r'https?://(?:www\.)?rtvcplay\.co' def _extract_player_config(self, webpage, video_id): return self._search_json( r'<script\b[^>]*>[^<]*(?:var|let|const)\s+config\s*=', re.sub(r'"\s*\+\s*"', '', webpage), 'player_config', video_id, transform_source=js_to_json) def _extract_formats_and_subtitles_player_config(self, player_config, video_id): formats, subtitles = [], {} for source in traverse_obj(player_config, ('sources', ..., lambda _, v: url_or_none(v['url']))): ext = mimetype2ext(source.get('mimetype'), default=determine_ext(source['url'])) if ext == 'm3u8': fmts, subs = self._extract_m3u8_formats_and_subtitles( source['url'], video_id, 'mp4', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) else: formats.append({ 'url': source['url'], 'ext': ext, }) return formats, subtitles class RTVCPlayIE(RTVCPlayBaseIE): _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/(?P<category>(?!embed)[^/]+)/(?:[^?#]+/)?(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://www.rtvcplay.co/en-vivo/canal-institucional', 'info_dict': { 'id': 'canal-institucional', 'title': r're:^Canal Institucional', 'description': 'md5:eff9e548394175928059320c006031ea', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'live_status': 'is_live', 'ext': 'mp4', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.rtvcplay.co/en-vivo/senal-colombia', 'info_dict': { 'id': 'senal-colombia', 'title': r're:^Señal Colombia', 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'live_status': 'is_live', 'ext': 'mp4', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.rtvcplay.co/en-vivo/radio-nacional', 'info_dict': { 'id': 'radio-nacional', 'title': r're:^Radio Nacional', 'description': 'md5:5de009bc6a9fa79d2a6cf0b73f977d53', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'live_status': 'is_live', 'ext': 'mp4', }, 'params': { 'skip_download': 'Livestream', }, }, { 'url': 'https://www.rtvcplay.co/peliculas-ficcion/senoritas', 'md5': '1288ee6f6d1330d880f98bff2ed710a3', 'info_dict': { 'id': 'senoritas', 'title': 'Señoritas', 'description': 'md5:f095a2bb52cb6cf279daf6302f86fb32', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'ext': 'mp4', }, }, { 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa/james-regresa-clases-28022022', 'md5': 'f040a7380a269ad633cf837384d5e9fc', 'info_dict': { 'id': 'james-regresa-clases-28022022', 'title': 'James regresa a clases - 28/02/2022', 'description': 'md5:c5dcdf757c7ab29305e8763c6007e675', 'ext': 'mp4', }, }, { 'url': 'https://www.rtvcplay.co/peliculas-documentales/llinas-el-cerebro-y-el-universo', 'info_dict': { 'id': 'llinas-el-cerebro-y-el-universo', 'title': 'Llinás, el cerebro y el universo', 'description': 'md5:add875bf2309bb52b3e8b9b06116d9b0', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 3, }, { 'url': 'https://www.rtvcplay.co/competencias-basicas-ciudadanas-y-socioemocionales/profe-en-tu-casa', 'info_dict': { 'id': 'profe-en-tu-casa', 'title': 'Profe en tu casa', 'description': 'md5:47dbe20e263194413b1db2a2805a4f2e', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 537, }, { 'url': 'https://www.rtvcplay.co/series-al-oido/relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', 'info_dict': { 'id': 'relato-de-un-naufrago-una-travesia-del-periodismo-a-la-literatura', 'title': 'Relato de un náufrago: una travesía del periodismo a la literatura', 'description': 'md5:6da28fdca4a5a568ea47ef65ef775603', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 5, }, { 'url': 'https://www.rtvcplay.co/series-al-oido/diez-versiones', 'info_dict': { 'id': 'diez-versiones', 'title': 'Diez versiones', 'description': 'md5:997471ed971cb3fd8e41969457675306', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', }, 'playlist_mincount': 20, }] def _real_extract(self, url): video_id, category = self._match_valid_url(url).group('id', 'category') webpage = self._download_webpage(url, video_id) hydration = self._search_json( r'window\.__RTVCPLAY_STATE__\s*=', webpage, 'hydration', video_id, transform_source=js_to_json)['content']['currentContent'] asset_id = traverse_obj(hydration, ('video', 'assetid')) if asset_id: hls_url = hydration['base_url_hls'].replace('[node:field_asset_id]', asset_id) else: hls_url = traverse_obj(hydration, ('channel', 'hls')) metadata = traverse_obj(hydration, { 'title': 'title', 'description': 'description', 'thumbnail': ((('channel', 'image', 'logo'), ('resource', 'image', 'cover_desktop')), 'path'), }, get_all=False) # Probably it's a program's page if not hls_url: seasons = traverse_obj( hydration, ('widgets', lambda _, y: y['type'] == 'seasonList', 'contents'), get_all=False) if not seasons: podcast_episodes = hydration.get('audios') if not podcast_episodes: raise ExtractorError('Could not find asset_id nor program playlist nor podcast episodes') return self.playlist_result([ self.url_result(episode['file'], url_transparent=True, **traverse_obj(episode, { 'title': 'title', 'description': ('description', {clean_html}), 'episode_number': ('chapter_number', {float_or_none}, {int_or_none}), 'season_number': ('season', {int_or_none}), })) for episode in podcast_episodes], video_id, **metadata) entries = [self.url_result( urljoin(url, episode['slug']), url_transparent=True, **traverse_obj(season, { 'season': 'title', 'season_number': ('season', {int_or_none}), }), **traverse_obj(episode, { 'title': 'title', 'thumbnail': ('image', 'cover', 'path'), 'episode_number': ('chapter_number', {int_or_none}), })) for season in seasons for episode in traverse_obj(season, ('contents', ...))] return self.playlist_result(entries, video_id, **metadata) formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls_url, video_id, 'mp4') return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'is_live': category == 'en-vivo', **metadata, } class RTVCPlayEmbedIE(RTVCPlayBaseIE): _VALID_URL = RTVCPlayBaseIE._BASE_VALID_URL + r'/embed/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://www.rtvcplay.co/embed/72b0e699-248b-4929-a4a8-3782702fa7f9', 'md5': 'ed529aeaee7aa2a72afe91ac7d1177a8', 'info_dict': { 'id': '72b0e699-248b-4929-a4a8-3782702fa7f9', 'title': 'Tráiler: Señoritas', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'ext': 'mp4', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_config = self._extract_player_config(webpage, video_id) formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) asset_id = traverse_obj(player_config, ('rtvcplay', 'assetid')) metadata = {} if not asset_id else self._download_json( f'https://cms.rtvcplay.co/api/v1/video/asset-id/{asset_id}', video_id, fatal=False) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, **traverse_obj(metadata, { 'title': 'title', 'description': 'description', 'thumbnail': ('image', ..., 'thumbnail', 'path'), }, get_all=False), } class RTVCKalturaIE(RTVCPlayBaseIE): _VALID_URL = r'https?://media\.rtvc\.gov\.co/kalturartvc/(?P<id>[\w-]+)' _TESTS = [{ 'url': 'https://media.rtvc.gov.co/kalturartvc/indexSC.html', 'info_dict': { 'id': 'indexSC', 'title': r're:^Señal Colombia', 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'live_status': 'is_live', 'ext': 'mp4', }, 'params': { 'skip_download': 'Livestream', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) player_config = self._extract_player_config(webpage, video_id) formats, subtitles = self._extract_formats_and_subtitles_player_config(player_config, video_id) channel_id = traverse_obj(player_config, ('rtvcplay', 'channelId')) metadata = {} if not channel_id else self._download_json( f'https://cms.rtvcplay.co/api/v1/taxonomy_term/streaming/{channel_id}', video_id, fatal=False) fmts, subs = self._extract_m3u8_formats_and_subtitles( traverse_obj(metadata, ('channel', 'hls')), video_id, 'mp4', fatal=False) formats.extend(fmts) self._merge_subtitles(subs, target=subtitles) return { 'id': video_id, 'formats': formats, 'subtitles': subtitles, 'is_live': True, **traverse_obj(metadata, { 'title': 'title', 'description': 'description', 'thumbnail': ('channel', 'image', 'logo', 'path'), }), } ���������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtve.py����������������������������������������������������������0000664�0000000�0000000�00000031013�14675634471�0020211�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import base64 import io import struct from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, float_or_none, qualities, remove_end, remove_start, try_get, ) class RTVEALaCartaIE(InfoExtractor): IE_NAME = 'rtve.es:alacarta' IE_DESC = 'RTVE a la carta' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(m/)?(alacarta/videos|filmoteca)/[^/]+/[^/]+/(?P<id>\d+)' _TESTS = [{ 'url': 'http://www.rtve.es/alacarta/videos/balonmano/o-swiss-cup-masculina-final-espana-suecia/2491869/', 'md5': '1d49b7e1ca7a7502c56a4bf1b60f1b43', 'info_dict': { 'id': '2491869', 'ext': 'mp4', 'title': 'Balonmano - Swiss Cup masculina. Final: España-Suecia', 'duration': 5024.566, 'series': 'Balonmano', }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }, { 'note': 'Live stream', 'url': 'http://www.rtve.es/alacarta/videos/television/24h-live/1694255/', 'info_dict': { 'id': '1694255', 'ext': 'mp4', 'title': 're:^24H LIVE [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { 'skip_download': 'live stream', }, }, { 'url': 'http://www.rtve.es/alacarta/videos/servir-y-proteger/servir-proteger-capitulo-104/4236788/', 'md5': 'd850f3c8731ea53952ebab489cf81cbf', 'info_dict': { 'id': '4236788', 'ext': 'mp4', 'title': 'Servir y proteger - Capítulo 104', 'duration': 3222.0, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }, { 'url': 'http://www.rtve.es/m/alacarta/videos/cuentame-como-paso/cuentame-como-paso-t16-ultimo-minuto-nuestra-vida-capitulo-276/2969138/?media=tve', 'only_matching': True, }, { 'url': 'http://www.rtve.es/filmoteca/no-do/not-1-introduccion-primer-noticiario-espanol/1465256/', 'only_matching': True, }] def _real_initialize(self): user_agent_b64 = base64.b64encode(self.get_param('http_headers')['User-Agent'].encode()).decode('utf-8') self._manager = self._download_json( 'http://www.rtve.es/odin/loki/' + user_agent_b64, None, 'Fetching manager info')['manager'] @staticmethod def _decrypt_url(png): encrypted_data = io.BytesIO(base64.b64decode(png)[8:]) while True: length = struct.unpack('!I', encrypted_data.read(4))[0] chunk_type = encrypted_data.read(4) if chunk_type == b'IEND': break data = encrypted_data.read(length) if chunk_type == b'tEXt': alphabet_data, text = data.split(b'\0') quality, url_data = text.split(b'%%') alphabet = [] e = 0 d = 0 for l in alphabet_data.decode('iso-8859-1'): if d == 0: alphabet.append(l) d = e = (e + 1) % 4 else: d -= 1 url = '' f = 0 e = 3 b = 1 for letter in url_data.decode('iso-8859-1'): if f == 0: l = int(letter) * 10 f = 1 else: if e == 0: l += int(letter) url += alphabet[l] e = (b + 3) % 4 f = 0 b += 1 else: e -= 1 yield quality.decode(), url encrypted_data.read(4) # CRC def _extract_png_formats(self, video_id): png = self._download_webpage( f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/videos/{video_id}.png', video_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] for quality, video_url in self._decrypt_url(png): ext = determine_ext(video_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( video_url, video_id, 'dash', fatal=False)) else: formats.append({ 'format_id': quality, 'quality': q(quality), 'url': video_url, }) return formats def _real_extract(self, url): video_id = self._match_id(url) info = self._download_json( f'http://www.rtve.es/api/videos/{video_id}/config/alacarta_videos.json', video_id)['page']['items'][0] if info['state'] == 'DESPU': raise ExtractorError('The video is no longer available', expected=True) title = info['title'].strip() formats = self._extract_png_formats(video_id) subtitles = None sbt_file = info.get('sbtFile') if sbt_file: subtitles = self.extract_subtitles(video_id, sbt_file) is_live = info.get('live') is True return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': info.get('image'), 'subtitles': subtitles, 'duration': float_or_none(info.get('duration'), 1000), 'is_live': is_live, 'series': info.get('programTitle'), } def _get_subtitles(self, video_id, sub_file): subs = self._download_json( sub_file + '.json', video_id, 'Downloading subtitles info')['page']['items'] return dict( (s['lang'], [{'ext': 'vtt', 'url': s['src']}]) for s in subs) class RTVEAudioIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'rtve.es:audio' IE_DESC = 'RTVE audio' _VALID_URL = r'https?://(?:www\.)?rtve\.es/(alacarta|play)/audios/[^/]+/[^/]+/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'https://www.rtve.es/alacarta/audios/a-hombros-de-gigantes/palabra-ingeniero-codigos-informaticos-27-04-21/5889192/', 'md5': 'ae06d27bff945c4e87a50f89f6ce48ce', 'info_dict': { 'id': '5889192', 'ext': 'mp3', 'title': 'Códigos informáticos', 'thumbnail': r're:https?://.+/1598856591583.jpg', 'duration': 349.440, 'series': 'A hombros de gigantes', }, }, { 'url': 'https://www.rtve.es/play/audios/en-radio-3/ignatius-farray/5791165/', 'md5': '072855ab89a9450e0ba314c717fa5ebc', 'info_dict': { 'id': '5791165', 'ext': 'mp3', 'title': 'Ignatius Farray', 'thumbnail': r're:https?://.+/1613243011863.jpg', 'duration': 3559.559, 'series': 'En Radio 3', }, }, { 'url': 'https://www.rtve.es/play/audios/frankenstein-o-el-moderno-prometeo/capitulo-26-ultimo-muerte-victor-juan-jose-plans-mary-shelley/6082623/', 'md5': '0eadab248cc8dd193fa5765712e84d5c', 'info_dict': { 'id': '6082623', 'ext': 'mp3', 'title': 'Capítulo 26 y último: La muerte de Victor', 'thumbnail': r're:https?://.+/1632147445707.jpg', 'duration': 3174.086, 'series': 'Frankenstein o el moderno Prometeo', }, }] def _extract_png_formats(self, audio_id): """ This function retrieves media related png thumbnail which obfuscate valuable information about the media. This information is decrypted via base class _decrypt_url function providing media quality and media url """ png = self._download_webpage( f'http://www.rtve.es/ztnr/movil/thumbnail/{self._manager}/audios/{audio_id}.png', audio_id, 'Downloading url information', query={'q': 'v2'}) q = qualities(['Media', 'Alta', 'HQ', 'HD_READY', 'HD_FULL']) formats = [] for quality, audio_url in self._decrypt_url(png): ext = determine_ext(audio_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( audio_url, audio_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'mpd': formats.extend(self._extract_mpd_formats( audio_url, audio_id, 'dash', fatal=False)) else: formats.append({ 'format_id': quality, 'quality': q(quality), 'url': audio_url, }) return formats def _real_extract(self, url): audio_id = self._match_id(url) info = self._download_json( f'https://www.rtve.es/api/audios/{audio_id}.json', audio_id)['page']['items'][0] return { 'id': audio_id, 'title': info['title'].strip(), 'thumbnail': info.get('thumbnail'), 'duration': float_or_none(info.get('duration'), 1000), 'series': try_get(info, lambda x: x['programInfo']['title']), 'formats': self._extract_png_formats(audio_id), } class RTVEInfantilIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'rtve.es:infantil' IE_DESC = 'RTVE infantil' _VALID_URL = r'https?://(?:www\.)?rtve\.es/infantil/serie/[^/]+/video/[^/]+/(?P<id>[0-9]+)/' _TESTS = [{ 'url': 'http://www.rtve.es/infantil/serie/cleo/video/maneras-vivir/3040283/', 'md5': '5747454717aedf9f9fdf212d1bcfc48d', 'info_dict': { 'id': '3040283', 'ext': 'mp4', 'title': 'Maneras de vivir', 'thumbnail': r're:https?://.+/1426182947956\.JPG', 'duration': 357.958, }, 'expected_warnings': ['Failed to download MPD manifest', 'Failed to download m3u8 information'], }] class RTVELiveIE(RTVEALaCartaIE): # XXX: Do not subclass from concrete IE IE_NAME = 'rtve.es:live' IE_DESC = 'RTVE.es live streams' _VALID_URL = r'https?://(?:www\.)?rtve\.es/directo/(?P<id>[a-zA-Z0-9-]+)' _TESTS = [{ 'url': 'http://www.rtve.es/directo/la-1/', 'info_dict': { 'id': 'la-1', 'ext': 'mp4', 'title': 're:^La 1 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', }, 'params': { 'skip_download': 'live stream', }, }] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') webpage = self._download_webpage(url, video_id) title = remove_end(self._og_search_title(webpage), ' en directo en RTVE.es') title = remove_start(title, 'Estoy viendo ') vidplayer_id = self._search_regex( (r'playerId=player([0-9]+)', r'class=["\'].*?\blive_mod\b.*?["\'][^>]+data-assetid=["\'](\d+)', r'data-id=["\'](\d+)'), webpage, 'internal video ID') return { 'id': video_id, 'title': title, 'formats': self._extract_png_formats(vidplayer_id), 'is_live': True, } class RTVETelevisionIE(InfoExtractor): IE_NAME = 'rtve.es:television' _VALID_URL = r'https?://(?:www\.)?rtve\.es/television/[^/]+/[^/]+/(?P<id>\d+).shtml' _TEST = { 'url': 'http://www.rtve.es/television/20160628/revolucion-del-movil/1364141.shtml', 'info_dict': { 'id': '3069778', 'ext': 'mp4', 'title': 'Documentos TV - La revolución del móvil', 'duration': 3496.948, }, 'params': { 'skip_download': True, }, } def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) alacarta_url = self._search_regex( r'data-location="alacarta_videos"[^<]+url":"(http://www\.rtve\.es/alacarta.+?)&', webpage, 'alacarta url', default=None) if alacarta_url is None: raise ExtractorError( 'The webpage doesn\'t contain any video', expected=True) return self.url_result(alacarta_url, ie=RTVEALaCartaIE.ie_key()) ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtvs.py����������������������������������������������������������0000664�0000000�0000000�00000006547�14675634471�0020245�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( parse_duration, traverse_obj, unified_timestamp, ) class RTVSIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rtvs\.sk/(?:radio|televizia)/archiv(?:/\d+)?/(?P<id>\d+)/?(?:[#?]|$)' _TESTS = [{ # radio archive 'url': 'http://www.rtvs.sk/radio/archiv/11224/414872', 'md5': '134d5d6debdeddf8a5d761cbc9edacb8', 'info_dict': { 'id': '414872', 'ext': 'mp3', 'title': 'Ostrov pokladov 1 časť.mp3', 'duration': 2854, 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0000/b1R8.rtvs.jpg', 'display_id': '135331', }, }, { # tv archive 'url': 'http://www.rtvs.sk/televizia/archiv/8249/63118', 'info_dict': { 'id': '63118', 'ext': 'mp4', 'title': 'Amaro Džives - Náš deň', 'description': 'Galavečer pri príležitosti Medzinárodného dňa Rómov.', 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0031/L7Qm.amaro_dzives_png.jpg', 'timestamp': 1428555900, 'upload_date': '20150409', 'duration': 4986, }, }, { # tv archive 'url': 'https://www.rtvs.sk/televizia/archiv/18083?utm_source=web&utm_medium=rozcestnik&utm_campaign=Robin', 'info_dict': { 'id': '18083', 'ext': 'mp4', 'title': 'Robin', 'description': 'md5:2f70505a7b8364491003d65ff7a0940a', 'timestamp': 1636652760, 'display_id': '307655', 'duration': 831, 'upload_date': '20211111', 'thumbnail': 'https://www.rtvs.sk/media/a501/image/file/2/0916/robin.jpg', }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) iframe_id = self._search_regex( r'<iframe[^>]+id\s*=\s*"player_[^_]+_([0-9]+)"', webpage, 'Iframe ID') iframe_url = self._search_regex( fr'<iframe[^>]+id\s*=\s*"player_[^_]+_{re.escape(iframe_id)}"[^>]+src\s*=\s*"([^"]+)"', webpage, 'Iframe URL') webpage = self._download_webpage(iframe_url, video_id, 'Downloading iframe') json_url = self._search_regex(r'var\s+url\s*=\s*"([^"]+)"\s*\+\s*ruurl', webpage, 'json URL') data = self._download_json(f'https:{json_url}b=mozilla&p=win&v=97&f=0&d=1', video_id) if data.get('clip'): data['playlist'] = [data['clip']] if traverse_obj(data, ('playlist', 0, 'sources', 0, 'type')) == 'audio/mp3': formats = [{'url': traverse_obj(data, ('playlist', 0, 'sources', 0, 'src'))}] else: formats = self._extract_m3u8_formats(traverse_obj(data, ('playlist', 0, 'sources', 0, 'src')), video_id) return { 'id': video_id, 'display_id': iframe_id, 'title': traverse_obj(data, ('playlist', 0, 'title')), 'description': traverse_obj(data, ('playlist', 0, 'description')), 'duration': parse_duration(traverse_obj(data, ('playlist', 0, 'length'))), 'thumbnail': traverse_obj(data, ('playlist', 0, 'image')), 'timestamp': unified_timestamp(traverse_obj(data, ('playlist', 0, 'datetime_create'))), 'formats': formats, } ���������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rtvslo.py��������������������������������������������������������0000664�0000000�0000000�00000016752�14675634471�0020577�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( ExtractorError, int_or_none, parse_duration, traverse_obj, unified_timestamp, url_or_none, urljoin, ) class RTVSLOIE(InfoExtractor): IE_NAME = 'rtvslo.si' _VALID_URL = r'''(?x) https?://(?: (?:365|4d)\.rtvslo.si/arhiv/[^/?#&;]+| (?:www\.)?rtvslo\.si/rtv365/arhiv )/(?P<id>\d+)''' _GEO_COUNTRIES = ['SI'] _API_BASE = 'https://api.rtvslo.si/ava/{}/{}?client_id=82013fb3a531d5414f478747c1aca622' SUB_LANGS_MAP = {'Slovenski': 'sl'} _TESTS = [{ 'url': 'https://www.rtvslo.si/rtv365/arhiv/174842550?s=tv', 'info_dict': { 'id': '174842550', 'ext': 'mp4', 'release_timestamp': 1643140032, 'upload_date': '20220125', 'series': 'Dnevnik', 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/92/dnevnik_3_wide2.jpg', 'description': 'md5:76a18692757aeb8f0f51221106277dd2', 'timestamp': 1643137046, 'title': 'Dnevnik', 'series_id': '92', 'release_date': '20220125', 'duration': 1789, }, }, { 'url': 'https://365.rtvslo.si/arhiv/utrip/174843754', 'info_dict': { 'id': '174843754', 'ext': 'mp4', 'series_id': '94', 'release_date': '20220129', 'timestamp': 1643484455, 'title': 'Utrip', 'duration': 813, 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/94/utrip_1_wide2.jpg', 'description': 'md5:77f2892630c7b17bb7a5bb84319020c9', 'release_timestamp': 1643485825, 'upload_date': '20220129', 'series': 'Utrip', }, }, { 'url': 'https://365.rtvslo.si/arhiv/il-giornale-della-sera/174844609', 'info_dict': { 'id': '174844609', 'ext': 'mp3', 'series_id': '106615841', 'title': 'Il giornale della sera', 'duration': 1328, 'series': 'Il giornale della sera', 'timestamp': 1643743800, 'release_timestamp': 1643745424, 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/il-giornale-della-sera_wide2.jpg', 'upload_date': '20220201', 'tbr': 128000, 'release_date': '20220201', }, }, { 'url': 'https://365.rtvslo.si/arhiv/razred-zase/148350750', 'info_dict': { 'id': '148350750', 'ext': 'mp4', 'title': 'Prvi šolski dan, mozaična oddaja za mlade', 'series': 'Razred zase', 'series_id': '148185730', 'duration': 1481, 'upload_date': '20121019', 'timestamp': 1350672122, 'release_date': '20121019', 'release_timestamp': 1350672122, 'thumbnail': 'https://img.rtvcdn.si/_up/ava/ava_misc/show_logos/148185730/razred_zase_2014_logo_4d_wide2.jpg', }, }, { 'url': 'https://4d.rtvslo.si/arhiv/dnevnik/174842550', 'only_matching': True, }] def _real_extract(self, url): v_id = self._match_id(url) meta = self._download_json(self._API_BASE.format('getRecordingDrm', v_id), v_id)['response'] thumbs = [{'id': k, 'url': v, 'http_headers': {'Accept': 'image/jpeg'}} for k, v in (meta.get('images') or {}).items()] subs = {} for s in traverse_obj(meta, 'subs', 'subtitles', default=[]): lang = self.SUB_LANGS_MAP.get(s.get('language'), s.get('language') or 'und') subs.setdefault(lang, []).append({ 'url': s.get('file'), 'ext': traverse_obj(s, 'format', expected_type=str.lower), }) jwt = meta.get('jwt') if not jwt: raise ExtractorError('Site did not provide an authentication token, cannot proceed.') media = self._download_json(self._API_BASE.format('getMedia', v_id), v_id, query={'jwt': jwt})['response'] formats = [] skip_protocols = ['smil', 'f4m', 'dash'] adaptive_url = traverse_obj(media, ('addaptiveMedia', 'hls_sec'), expected_type=url_or_none) if adaptive_url: formats = self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols) adaptive_url = traverse_obj(media, ('addaptiveMedia_sl', 'hls_sec'), expected_type=url_or_none) if adaptive_url: for f in self._extract_wowza_formats(adaptive_url, v_id, skip_protocols=skip_protocols): formats.append({ **f, 'format_id': 'sign-' + f['format_id'], 'format_note': 'Sign language interpretation', 'preference': -10, 'language': ( 'slv' if f.get('language') == 'eng' and f.get('acodec') != 'none' else f.get('language')), }) for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['https']))): formats.append(traverse_obj(mediafile, { 'url': ('streams', 'https'), 'ext': ('mediaType', {str.lower}), 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), 'tbr': ('bitrate', {int_or_none}), 'filesize': ('filesize', {int_or_none}), })) for mediafile in traverse_obj(media, ('mediaFiles', lambda _, v: url_or_none(v['streams']['hls_sec']))): formats.extend(self._extract_wowza_formats( mediafile['streams']['hls_sec'], v_id, skip_protocols=skip_protocols)) if any('intermission.mp4' in x['url'] for x in formats): self.raise_geo_restricted(countries=self._GEO_COUNTRIES, metadata_available=True) if any('dummy_720p.mp4' in x.get('manifest_url', '') for x in formats) and meta.get('stub') == 'error': raise ExtractorError(f'{self.IE_NAME} said: Clip not available', expected=True) return { 'id': v_id, 'webpage_url': ''.join(traverse_obj(meta, ('canonical', ('domain', 'path')))), 'title': meta.get('title'), 'formats': formats, 'subtitles': subs, 'thumbnails': thumbs, 'description': meta.get('description'), 'timestamp': unified_timestamp(traverse_obj(meta, 'broadcastDate', ('broadcastDates', 0))), 'release_timestamp': unified_timestamp(meta.get('recordingDate')), 'duration': meta.get('duration') or parse_duration(meta.get('length')), 'tags': meta.get('genre'), 'series': meta.get('showName'), 'series_id': meta.get('showId'), } class RTVSLOShowIE(InfoExtractor): IE_NAME = 'rtvslo.si:show' _VALID_URL = r'https?://(?:365|4d)\.rtvslo.si/oddaja/[^/?#&]+/(?P<id>\d+)' _TESTS = [{ 'url': 'https://365.rtvslo.si/oddaja/ekipa-bled/173250997', 'info_dict': { 'id': '173250997', 'title': 'Ekipa Bled', }, 'playlist_count': 18, }] def _real_extract(self, url): playlist_id = self._match_id(url) webpage = self._download_webpage(url, playlist_id) return self.playlist_from_matches( re.findall(r'<a [^>]*\bhref="(/arhiv/[^"]+)"', webpage), playlist_id, self._html_extract_title(webpage), getter=lambda x: urljoin('https://365.rtvslo.si', x), ie=RTVSLOIE) ����������������������yt-dlp-2024.09.27/yt_dlp/extractor/rudovideo.py�����������������������������������������������������0000664�0000000�0000000�00000012125�14675634471�0021234�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, js_to_json, traverse_obj, update_url_query, url_or_none, ) class RudoVideoIE(InfoExtractor): _VALID_URL = r'https?://rudo\.video/(?P<type>vod|podcast|live)/(?P<id>[^/?&#]+)' _EMBED_REGEX = [r'<iframe[^>]+src=[\'"](?P<url>(?:https?:)//rudo\.video/(?:vod|podcast|live)/[^\'"]+)'] _TESTS = [{ 'url': 'https://rudo.video/podcast/cz2wrUy8l0o', 'md5': '28ed82b477708dc5e12e072da2449221', 'info_dict': { 'id': 'cz2wrUy8l0o', 'title': 'Diego Cabot', 'ext': 'mp4', 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', }, }, { 'url': 'https://rudo.video/podcast/bQkt07', 'md5': '36b22a9863de0f47f00fc7532a32a898', 'info_dict': { 'id': 'bQkt07', 'title': 'Tubular Bells', 'ext': 'mp4', 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', }, }, { 'url': 'https://rudo.video/podcast/b42ZUznHX0', 'md5': 'b91c70d832938871367f8ad10c895821', 'info_dict': { 'id': 'b42ZUznHX0', 'title': 'Columna Ruperto Concha', 'ext': 'mp3', 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', }, }, { 'url': 'https://rudo.video/vod/bN5AaJ', 'md5': '01324a329227e2591530ecb4f555c881', 'info_dict': { 'id': 'bN5AaJ', 'title': 'Ucrania 19.03', 'creator': 'La Tercera', 'ext': 'mp4', 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', }, }, { 'url': 'https://rudo.video/live/bbtv', 'info_dict': { 'id': 'bbtv', 'ext': 'mp4', 'creator': 'BioBioTV', 'live_status': 'is_live', 'title': r're:^LIVE BBTV\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}$', 'thumbnail': r're:^(?:https?:)?//.*\.(png|jpg)$', }, }, { 'url': 'https://rudo.video/live/c13', 'info_dict': { 'id': 'c13', 'title': 'CANAL13', 'ext': 'mp4', }, 'skip': 'Geo-restricted to Chile', }, { 'url': 'https://rudo.video/live/t13-13cl', 'info_dict': { 'id': 't13-13cl', 'title': 'T13', 'ext': 'mp4', }, 'skip': 'Geo-restricted to Chile', }] def _real_extract(self, url): video_id, type_ = self._match_valid_url(url).group('id', 'type') is_live = type_ == 'live' webpage = self._download_webpage(url, video_id) if 'Streaming is not available in your area' in webpage: self.raise_geo_restricted() media_url = ( self._search_regex( r'var\s+streamURL\s*=\s*[\'"]([^?\'"]+)', webpage, 'stream url', default=None) # Source URL must be used only if streamURL is unavailable or self._search_regex( r'<source[^>]+src=[\'"]([^\'"]+)', webpage, 'source url', default=None)) if not media_url: youtube_url = self._search_regex(r'file:\s*[\'"]((?:https?:)//(?:www\.)?youtube\.com[^\'"]+)', webpage, 'youtube url', default=None) if youtube_url: return self.url_result(youtube_url, 'Youtube') raise ExtractorError('Unable to extract stream url') token_array = self._search_json( r'<script>var\s+_\$_[a-zA-Z0-9]+\s*=', webpage, 'access token array', video_id, contains_pattern=r'\[(?s:.+)\]', default=None, transform_source=js_to_json) if token_array: token_url = traverse_obj(token_array, (..., {url_or_none}), get_all=False) if not token_url: raise ExtractorError('Invalid access token array') access_token = self._download_json( token_url, video_id, note='Downloading access token')['data']['authToken'] media_url = update_url_query(media_url, {'auth-token': access_token}) ext = determine_ext(media_url) if ext == 'm3u8': formats = self._extract_m3u8_formats(media_url, video_id, live=is_live) elif ext == 'mp3': formats = [{ 'url': media_url, 'vcodec': 'none', }] else: formats = [{'url': media_url}] return { 'id': video_id, 'title': (self._search_regex(r'var\s+titleVideo\s*=\s*[\'"]([^\'"]+)', webpage, 'title', default=None) or self._og_search_title(webpage)), 'creator': self._search_regex(r'var\s+videoAuthor\s*=\s*[\'"]([^?\'"]+)', webpage, 'videoAuthor', default=None), 'thumbnail': (self._search_regex(r'var\s+posterIMG\s*=\s*[\'"]([^?\'"]+)', webpage, 'thumbnail', default=None) or self._og_search_thumbnail(webpage)), 'formats': formats, 'is_live': is_live, } �������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rule34video.py���������������������������������������������������0000664�0000000�0000000�00000012132�14675634471�0021377�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( clean_html, extract_attributes, get_element_by_attribute, get_element_by_class, get_element_html_by_class, get_elements_by_class, int_or_none, parse_count, parse_duration, unescapeHTML, ) from ..utils.traversal import traverse_obj class Rule34VideoIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rule34video\.com/videos?/(?P<id>\d+)' _TESTS = [ { 'url': 'https://rule34video.com/video/3065157/shot-it-mmd-hmv/', 'md5': 'ffccac2c23799dabbd192621ae4d04f3', 'info_dict': { 'id': '3065157', 'ext': 'mp4', 'title': 'Shot It-(mmd hmv)', 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065157/preview.jpg', 'duration': 347.0, 'age_limit': 18, 'view_count': int, 'like_count': int, 'comment_count': int, 'timestamp': 1639872000, 'description': 'https://discord.gg/aBqPrHSHvv', 'upload_date': '20211219', 'uploader': 'Sweet HMV', 'uploader_url': 'https://rule34video.com/members/22119/', 'categories': ['3D', 'MMD', 'iwara'], 'tags': 'mincount:10', }, }, { 'url': 'https://rule34video.com/videos/3065296/lara-in-trouble-ep-7-wildeerstudio/', 'md5': '6bb5169f9f6b38cd70882bf2e64f6b86', 'info_dict': { 'id': '3065296', 'ext': 'mp4', 'title': 'Lara in Trouble Ep. 7 [WildeerStudio]', 'thumbnail': 'https://rule34video.com/contents/videos_screenshots/3065000/3065296/preview.jpg', 'duration': 938.0, 'age_limit': 18, 'view_count': int, 'like_count': int, 'comment_count': int, 'timestamp': 1640131200, 'description': '', 'creators': ['WildeerStudio'], 'upload_date': '20211222', 'uploader': 'CerZule', 'uploader_url': 'https://rule34video.com/members/36281/', 'categories': ['3D', 'Tomb Raider'], 'tags': 'mincount:40', }, }, ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) formats = [] for mobj in re.finditer(r'<a[^>]+href="(?P<video_url>[^"]+download=true[^"]+)".*>(?P<ext>[^\s]+) (?P<quality>[^<]+)p</a>', webpage): url, ext, quality = mobj.groups() formats.append({ 'url': url, 'ext': ext.lower(), 'quality': quality, }) categories, creators, uploader, uploader_url = [None] * 4 for col in get_elements_by_class('col', webpage): label = clean_html(get_element_by_class('label', col)) if label == 'Categories:': categories = list(map(clean_html, get_elements_by_class('item', col))) elif label == 'Artist:': creators = list(map(clean_html, get_elements_by_class('item', col))) elif label == 'Uploaded By:': uploader = clean_html(get_element_by_class('name', col)) uploader_url = extract_attributes(get_element_html_by_class('name', col) or '').get('href') return { **traverse_obj(self._search_json_ld(webpage, video_id, default={}), ({ 'title': 'title', 'view_count': 'view_count', 'like_count': 'like_count', 'duration': 'duration', 'timestamp': 'timestamp', 'description': 'description', 'thumbnail': ('thumbnails', 0, 'url'), })), 'id': video_id, 'formats': formats, 'title': self._html_extract_title(webpage), 'thumbnail': self._html_search_regex( r'preview_url:\s+\'([^\']+)\'', webpage, 'thumbnail', default=None), 'duration': parse_duration(self._html_search_regex( r'"icon-clock"></i>\s+<span>((?:\d+:?)+)', webpage, 'duration', default=None)), 'view_count': int_or_none(self._html_search_regex( r'"icon-eye"></i>\s+<span>([ \d]+)', webpage, 'views', default='').replace(' ', '')), 'like_count': parse_count(get_element_by_class('voters count', webpage)), 'comment_count': int_or_none(self._search_regex( r'[^(]+\((\d+)\)', get_element_by_attribute('href', '#tab_comments', webpage), 'comment count', fatal=False)), 'age_limit': 18, 'creators': creators, 'uploader': uploader, 'uploader_url': uploader_url, 'categories': categories, 'tags': list(map(unescapeHTML, re.findall( r'<a class="tag_item"[^>]+\bhref="https://rule34video\.com/tags/\d+/"[^>]*>(?P<tag>[^>]*)</a>', webpage))), } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rumble.py��������������������������������������������������������0000664�0000000�0000000�00000036554�14675634471�0020536�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools import re from .common import InfoExtractor from ..networking.exceptions import HTTPError from ..utils import ( ExtractorError, UnsupportedError, clean_html, determine_ext, extract_attributes, format_field, get_element_by_class, get_elements_html_by_class, int_or_none, join_nonempty, parse_count, parse_iso8601, traverse_obj, unescapeHTML, urljoin, ) class RumbleEmbedIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rumble\.com/embed/(?:[0-9a-z]+\.)?(?P<id>[0-9a-z]+)' _EMBED_REGEX = [fr'(?:<(?:script|iframe)[^>]+\bsrc=|["\']embedUrl["\']\s*:\s*)["\'](?P<url>{_VALID_URL})'] _TESTS = [{ 'url': 'https://rumble.com/embed/v5pv5f', 'md5': '36a18a049856720189f30977ccbb2c34', 'info_dict': { 'id': 'v5pv5f', 'ext': 'mp4', 'title': 'WMAR 2 News Latest Headlines | October 20, 6pm', 'timestamp': 1571611968, 'upload_date': '20191020', 'channel_url': 'https://rumble.com/c/WMAR', 'channel': 'WMAR', 'thumbnail': 'https://sp.rmbl.ws/s8/1/5/M/z/1/5Mz1a.qR4e-small-WMAR-2-News-Latest-Headline.jpg', 'duration': 234, 'uploader': 'WMAR', 'live_status': 'not_live', }, }, { 'url': 'https://rumble.com/embed/vslb7v', 'md5': '7418035de1a30a178b8af34dc2b6a52b', 'info_dict': { 'id': 'vslb7v', 'ext': 'mp4', 'title': 'Defense Sec. says US Commitment to NATO Defense \'Ironclad\'', 'timestamp': 1645142135, 'upload_date': '20220217', 'channel_url': 'https://rumble.com/c/CyberTechNews', 'channel': 'CTNews', 'thumbnail': 'https://sp.rmbl.ws/s8/6/7/i/9/h/7i9hd.OvCc.jpg', 'duration': 901, 'uploader': 'CTNews', 'live_status': 'not_live', }, }, { 'url': 'https://rumble.com/embed/vunh1h', 'info_dict': { 'id': 'vunh1h', 'ext': 'mp4', 'title': '‘Gideon, op zoek naar de waarheid’ including ENG SUBS', 'timestamp': 1647197663, 'upload_date': '20220313', 'channel_url': 'https://rumble.com/user/BLCKBX', 'channel': 'BLCKBX', 'thumbnail': r're:https://.+\.jpg', 'duration': 5069, 'uploader': 'BLCKBX', 'live_status': 'not_live', 'subtitles': { 'en': [ { 'url': r're:https://.+\.vtt', 'name': 'English', 'ext': 'vtt', }, ], }, }, 'params': {'skip_download': True}, }, { 'url': 'https://rumble.com/embed/v1essrt', 'info_dict': { 'id': 'v1essrt', 'ext': 'mp4', 'title': 'startswith:lofi hip hop radio 📚 - beats to relax/study to', 'timestamp': 1661519399, 'upload_date': '20220826', 'channel_url': 'https://rumble.com/c/LofiGirl', 'channel': 'Lofi Girl', 'thumbnail': r're:https://.+\.jpg', 'uploader': 'Lofi Girl', 'live_status': 'is_live', }, 'params': {'skip_download': True}, }, { 'url': 'https://rumble.com/embed/v1amumr', 'info_dict': { 'id': 'v1amumr', 'ext': 'mp4', 'fps': 60, 'title': 'Turning Point USA 2022 Student Action Summit DAY 1 - Rumble Exclusive Live', 'timestamp': 1658518457, 'upload_date': '20220722', 'channel_url': 'https://rumble.com/c/RumbleEvents', 'channel': 'Rumble Events', 'thumbnail': r're:https://.+\.jpg', 'duration': 16427, 'uploader': 'Rumble Events', 'live_status': 'was_live', }, 'params': {'skip_download': True}, }, { 'url': 'https://rumble.com/embed/ufe9n.v5pv5f', 'only_matching': True, }] _WEBPAGE_TESTS = [ { 'note': 'Rumble JS embed', 'url': 'https://therightscoop.com/what-does-9-plus-1-plus-1-equal-listen-to-this-audio-of-attempted-kavanaugh-assassins-call-and-youll-get-it', 'md5': '4701209ac99095592e73dbba21889690', 'info_dict': { 'id': 'v15eqxl', 'ext': 'mp4', 'channel': 'Mr Producer Media', 'duration': 92, 'title': '911 Audio From The Man Who Wanted To Kill Supreme Court Justice Kavanaugh', 'channel_url': 'https://rumble.com/c/RichSementa', 'thumbnail': 'https://sp.rmbl.ws/s8/1/P/j/f/A/PjfAe.qR4e-small-911-Audio-From-The-Man-Who-.jpg', 'timestamp': 1654892716, 'uploader': 'Mr Producer Media', 'upload_date': '20220610', 'live_status': 'not_live', }, }, ] @classmethod def _extract_embed_urls(cls, url, webpage): embeds = tuple(super()._extract_embed_urls(url, webpage)) if embeds: return embeds return [f'https://rumble.com/embed/{mobj.group("id")}' for mobj in re.finditer( r'<script>[^<]*\bRumble\(\s*"play"\s*,\s*{[^}]*[\'"]?video[\'"]?\s*:\s*[\'"](?P<id>[0-9a-z]+)[\'"]', webpage)] def _real_extract(self, url): video_id = self._match_id(url) video = self._download_json( 'https://rumble.com/embedJS/u3/', video_id, query={'request': 'video', 'ver': 2, 'v': video_id}) sys_msg = traverse_obj(video, ('sys', 'msg')) if sys_msg: self.report_warning(sys_msg, video_id=video_id) if video.get('live') == 0: live_status = 'not_live' if video.get('livestream_has_dvr') is None else 'was_live' elif video.get('live') == 1: live_status = 'is_upcoming' if video.get('livestream_has_dvr') else 'was_live' elif video.get('live') == 2: live_status = 'is_live' else: live_status = None formats = [] for ext, ext_info in (video.get('ua') or {}).items(): if isinstance(ext_info, dict): for height, video_info in ext_info.items(): if not traverse_obj(video_info, ('meta', 'h', {int_or_none})): video_info.setdefault('meta', {})['h'] = height ext_info = ext_info.values() for video_info in ext_info: meta = video_info.get('meta') or {} if not video_info.get('url'): continue if ext == 'hls': if meta.get('live') is True and video.get('live') == 1: live_status = 'post_live' formats.extend(self._extract_m3u8_formats( video_info['url'], video_id, ext='mp4', m3u8_id='hls', fatal=False, live=live_status == 'is_live')) continue timeline = ext == 'timeline' if timeline: ext = determine_ext(video_info['url']) formats.append({ 'ext': ext, 'acodec': 'none' if timeline else None, 'url': video_info['url'], 'format_id': join_nonempty(ext, format_field(meta, 'h', '%sp')), 'format_note': 'Timeline' if timeline else None, 'fps': None if timeline else video.get('fps'), **traverse_obj(meta, { 'tbr': 'bitrate', 'filesize': 'size', 'width': 'w', 'height': 'h', }, expected_type=lambda x: int(x) or None), }) subtitles = { lang: [{ 'url': sub_info['path'], 'name': sub_info.get('language') or '', }] for lang, sub_info in (video.get('cc') or {}).items() if sub_info.get('path') } author = video.get('author') or {} thumbnails = traverse_obj(video, ('t', ..., {'url': 'i', 'width': 'w', 'height': 'h'})) if not thumbnails and video.get('i'): thumbnails = [{'url': video['i']}] if live_status in {'is_live', 'post_live'}: duration = None else: duration = int_or_none(video.get('duration')) return { 'id': video_id, 'title': unescapeHTML(video.get('title')), 'formats': formats, 'subtitles': subtitles, 'thumbnails': thumbnails, 'timestamp': parse_iso8601(video.get('pubDate')), 'channel': author.get('name'), 'channel_url': author.get('url'), 'duration': duration, 'uploader': author.get('name'), 'live_status': live_status, } class RumbleIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?rumble\.com/(?P<id>v(?!ideos)[\w.-]+)[^/]*$' _EMBED_REGEX = [ r'<a class=video-item--a href=(?P<url>/v[\w.-]+\.html)>', r'<a[^>]+class="videostream__link link"[^>]+href=(?P<url>/v[\w.-]+\.html)[^>]*>'] _TESTS = [{ 'add_ie': ['RumbleEmbed'], 'url': 'https://rumble.com/vdmum1-moose-the-dog-helps-girls-dig-a-snow-fort.html', 'md5': '53af34098a7f92c4e51cf0bd1c33f009', 'info_dict': { 'id': 'vb0ofn', 'ext': 'mp4', 'timestamp': 1612662578, 'uploader': 'LovingMontana', 'channel': 'LovingMontana', 'upload_date': '20210207', 'title': 'Winter-loving dog helps girls dig a snow fort ', 'description': 'Moose the dog is more than happy to help with digging out this epic snow fort. Great job, Moose!', 'channel_url': 'https://rumble.com/c/c-546523', 'thumbnail': r're:https://.+\.jpg', 'duration': 103, 'like_count': int, 'dislike_count': int, 'view_count': int, 'live_status': 'not_live', }, }, { 'url': 'http://www.rumble.com/vDMUM1?key=value', 'only_matching': True, }, { 'note': 'timeline format', 'url': 'https://rumble.com/v2ea9qb-the-u.s.-cannot-hide-this-in-ukraine-anymore-redacted-with-natali-and-clayt.html', 'md5': '40d61fec6c0945bca3d0e1dc1aa53d79', 'params': {'format': 'wv'}, 'info_dict': { 'id': 'v2bou5f', 'ext': 'mp4', 'uploader': 'Redacted News', 'upload_date': '20230322', 'timestamp': 1679445010, 'title': 'The U.S. CANNOT hide this in Ukraine anymore | Redacted with Natali and Clayton Morris', 'duration': 892, 'channel': 'Redacted News', 'description': 'md5:aaad0c5c3426d7a361c29bdaaced7c42', 'channel_url': 'https://rumble.com/c/Redacted', 'live_status': 'not_live', 'thumbnail': 'https://sp.rmbl.ws/s8/1/d/x/2/O/dx2Oi.qR4e-small-The-U.S.-CANNOT-hide-this-i.jpg', 'like_count': int, 'dislike_count': int, 'view_count': int, }, }, { 'url': 'https://rumble.com/v2e7fju-the-covid-twitter-files-drop-protecting-fauci-while-censoring-the-truth-wma.html', 'info_dict': { 'id': 'v2blzyy', 'ext': 'mp4', 'live_status': 'was_live', 'release_timestamp': 1679446804, 'description': 'md5:2ac4908ccfecfb921f8ffa4b30c1e636', 'release_date': '20230322', 'timestamp': 1679445692, 'duration': 4435, 'upload_date': '20230322', 'title': 'The Covid Twitter Files Drop: Protecting Fauci While Censoring The Truth w/Matt Taibbi', 'uploader': 'Kim Iversen', 'channel_url': 'https://rumble.com/c/KimIversen', 'channel': 'Kim Iversen', 'thumbnail': 'https://sp.rmbl.ws/s8/1/6/b/w/O/6bwOi.qR4e-small-The-Covid-Twitter-Files-Dro.jpg', 'like_count': int, 'dislike_count': int, 'view_count': int, }, }] _WEBPAGE_TESTS = [{ 'url': 'https://rumble.com/videos?page=2', 'playlist_mincount': 24, 'info_dict': { 'id': 'videos?page=2', 'title': 'All videos', 'description': 'Browse videos uploaded to Rumble.com', 'age_limit': 0, }, }, { 'url': 'https://rumble.com/browse/live', 'playlist_mincount': 25, 'info_dict': { 'id': 'live', 'title': 'Browse', 'age_limit': 0, }, }, { 'url': 'https://rumble.com/search/video?q=rumble&sort=views', 'playlist_mincount': 24, 'info_dict': { 'id': 'video?q=rumble&sort=views', 'title': 'Search results for: rumble', 'age_limit': 0, }, }] def _real_extract(self, url): page_id = self._match_id(url) webpage = self._download_webpage(url, page_id) url_info = next(RumbleEmbedIE.extract_from_webpage(self._downloader, url, webpage), None) if not url_info: raise UnsupportedError(url) return { '_type': 'url_transparent', 'ie_key': url_info['ie_key'], 'url': url_info['url'], 'release_timestamp': parse_iso8601(self._search_regex( r'(?:Livestream begins|Streamed on):\s+<time datetime="([^"]+)', webpage, 'release date', default=None)), 'view_count': int_or_none(self._search_regex( r'"userInteractionCount"\s*:\s*(\d+)', webpage, 'view count', default=None)), 'like_count': parse_count(self._search_regex( r'<span data-js="rumbles_up_votes">\s*([\d,.KM]+)', webpage, 'like count', default=None)), 'dislike_count': parse_count(self._search_regex( r'<span data-js="rumbles_down_votes">\s*([\d,.KM]+)', webpage, 'dislike count', default=None)), 'description': clean_html(get_element_by_class('media-description', webpage)), } class RumbleChannelIE(InfoExtractor): _VALID_URL = r'(?P<url>https?://(?:www\.)?rumble\.com/(?:c|user)/(?P<id>[^&?#$/]+))' _TESTS = [{ 'url': 'https://rumble.com/c/Styxhexenhammer666', 'playlist_mincount': 1160, 'info_dict': { 'id': 'Styxhexenhammer666', }, }, { 'url': 'https://rumble.com/user/goldenpoodleharleyeuna', 'playlist_mincount': 4, 'info_dict': { 'id': 'goldenpoodleharleyeuna', }, }] def entries(self, url, playlist_id): for page in itertools.count(1): try: webpage = self._download_webpage(f'{url}?page={page}', playlist_id, note=f'Downloading page {page}') except ExtractorError as e: if isinstance(e.cause, HTTPError) and e.cause.status == 404: break raise for video_url in traverse_obj( get_elements_html_by_class('videostream__link', webpage), (..., {extract_attributes}, 'href'), ): yield self.url_result(urljoin('https://rumble.com', video_url)) def _real_extract(self, url): url, playlist_id = self._match_valid_url(url).groups() return self.playlist_result(self.entries(url, playlist_id), playlist_id=playlist_id) ����������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rutube.py��������������������������������������������������������0000664�0000000�0000000�00000035261�14675634471�0020550�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import itertools from .common import InfoExtractor from ..utils import ( bool_or_none, determine_ext, int_or_none, parse_qs, traverse_obj, try_get, unified_timestamp, url_or_none, ) class RutubeBaseIE(InfoExtractor): def _download_api_info(self, video_id, query=None): if not query: query = {} query['format'] = 'json' return self._download_json( f'http://rutube.ru/api/video/{video_id}/', video_id, 'Downloading video JSON', 'Unable to download video JSON', query=query) def _extract_info(self, video, video_id=None, require_title=True): title = video['title'] if require_title else video.get('title') age_limit = video.get('is_adult') if age_limit is not None: age_limit = 18 if age_limit is True else 0 uploader_id = try_get(video, lambda x: x['author']['id']) category = try_get(video, lambda x: x['category']['name']) description = video.get('description') duration = int_or_none(video.get('duration')) return { 'id': video.get('id') or video_id if video_id else video['id'], 'title': title, 'description': description, 'thumbnail': video.get('thumbnail_url'), 'duration': duration, 'uploader': try_get(video, lambda x: x['author']['name']), 'uploader_id': str(uploader_id) if uploader_id else None, 'timestamp': unified_timestamp(video.get('created_ts')), 'categories': [category] if category else None, 'age_limit': age_limit, 'view_count': int_or_none(video.get('hits')), 'comment_count': int_or_none(video.get('comments_count')), 'is_live': bool_or_none(video.get('is_livestream')), 'chapters': self._extract_chapters_from_description(description, duration), } def _download_and_extract_info(self, video_id, query=None): return self._extract_info( self._download_api_info(video_id, query=query), video_id) def _download_api_options(self, video_id, query=None): if not query: query = {} query['format'] = 'json' return self._download_json( f'http://rutube.ru/api/play/options/{video_id}/', video_id, 'Downloading options JSON', 'Unable to download options JSON', headers=self.geo_verification_headers(), query=query) def _extract_formats(self, options, video_id): formats = [] for format_id, format_url in options['video_balancer'].items(): ext = determine_ext(format_url) if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( format_url, video_id, 'mp4', m3u8_id=format_id, fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( format_url, video_id, f4m_id=format_id, fatal=False)) else: formats.append({ 'url': format_url, 'format_id': format_id, }) for hls_url in traverse_obj(options, ('live_streams', 'hls', ..., 'url', {url_or_none})): formats.extend(self._extract_m3u8_formats(hls_url, video_id, ext='mp4', fatal=False)) return formats def _download_and_extract_formats(self, video_id, query=None): return self._extract_formats( self._download_api_options(video_id, query=query), video_id) class RutubeIE(RutubeBaseIE): IE_NAME = 'rutube' IE_DESC = 'Rutube videos' _VALID_URL = r'https?://rutube\.ru/(?:(?:live/)?video(?:/private)?|(?:play/)?embed)/(?P<id>[\da-z]{32})' _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/(?:play/)?embed/[\da-z]{32}.*?)\1'] _TESTS = [{ 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/', 'md5': 'e33ac625efca66aba86cbec9851f2692', 'info_dict': { 'id': '3eac3b4561676c17df9132a9a1e62e3e', 'ext': 'mp4', 'title': 'Раненный кенгуру забежал в аптеку', 'description': 'http://www.ntdtv.ru ', 'duration': 81, 'uploader': 'NTDRussian', 'uploader_id': '29790', 'timestamp': 1381943602, 'upload_date': '20131016', 'age_limit': 0, 'view_count': int, 'thumbnail': 'http://pic.rutubelist.ru/video/d2/a0/d2a0aec998494a396deafc7ba2c82add.jpg', 'categories': ['Новости и СМИ'], 'chapters': [], }, 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'http://rutube.ru/play/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, }, { 'url': 'http://rutube.ru/embed/a10e53b86e8f349080f718582ce4c661', 'only_matching': True, }, { 'url': 'http://rutube.ru/video/3eac3b4561676c17df9132a9a1e62e3e/?pl_id=4252', 'only_matching': True, }, { 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_type=source', 'only_matching': True, }, { 'url': 'https://rutube.ru/video/private/884fb55f07a97ab673c7d654553e0f48/?p=x2QojCumHTS3rsKHWXN8Lg', 'md5': 'd106225f15d625538fe22971158e896f', 'info_dict': { 'id': '884fb55f07a97ab673c7d654553e0f48', 'ext': 'mp4', 'title': 'Яцуноками, Nioh2', 'description': 'Nioh2: финал сражения с боссом Яцуноками', 'duration': 15, 'uploader': 'mexus', 'uploader_id': '24222106', 'timestamp': 1670646232, 'upload_date': '20221210', 'age_limit': 0, 'view_count': int, 'thumbnail': 'http://pic.rutubelist.ru/video/f2/d4/f2d42b54be0a6e69c1c22539e3152156.jpg', 'categories': ['Видеоигры'], 'chapters': [], }, 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'https://rutube.ru/video/c65b465ad0c98c89f3b25cb03dcc87c6/', 'info_dict': { 'id': 'c65b465ad0c98c89f3b25cb03dcc87c6', 'ext': 'mp4', 'chapters': 'count:4', 'categories': ['Бизнес и предпринимательство'], 'description': 'md5:252feac1305257d8c1bab215cedde75d', 'thumbnail': 'http://pic.rutubelist.ru/video/71/8f/718f27425ea9706073eb80883dd3787b.png', 'duration': 782, 'age_limit': 0, 'uploader_id': '23491359', 'timestamp': 1677153329, 'view_count': int, 'upload_date': '20230223', 'title': 'Бизнес с нуля: найм сотрудников. Интервью с директором строительной компании', 'uploader': 'Стас Быков', }, 'expected_warnings': ['Unable to download f4m'], }, { 'url': 'https://rutube.ru/live/video/c58f502c7bb34a8fcdd976b221fca292/', 'info_dict': { 'id': 'c58f502c7bb34a8fcdd976b221fca292', 'ext': 'mp4', 'categories': ['Телепередачи'], 'description': '', 'thumbnail': 'http://pic.rutubelist.ru/video/14/19/14190807c0c48b40361aca93ad0867c7.jpg', 'live_status': 'is_live', 'age_limit': 0, 'uploader_id': '23460655', 'timestamp': 1652972968, 'view_count': int, 'upload_date': '20220519', 'title': r're:Первый канал. Прямой эфир \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', 'uploader': 'Первый канал', }, }, { 'url': 'https://rutube.ru/video/5ab908fccfac5bb43ef2b1e4182256b0/', 'only_matching': True, }, { 'url': 'https://rutube.ru/live/video/private/c58f502c7bb34a8fcdd976b221fca292/', 'only_matching': True, }] @classmethod def suitable(cls, url): return False if RutubePlaylistIE.suitable(url) else super().suitable(url) def _real_extract(self, url): video_id = self._match_id(url) query = parse_qs(url) info = self._download_and_extract_info(video_id, query) info['formats'] = self._download_and_extract_formats(video_id, query) return info class RutubeEmbedIE(RutubeBaseIE): IE_NAME = 'rutube:embed' IE_DESC = 'Rutube embedded videos' _VALID_URL = r'https?://rutube\.ru/(?:video|play)/embed/(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://rutube.ru/video/embed/6722881?vk_puid37=&vk_puid38=', 'info_dict': { 'id': 'a10e53b86e8f349080f718582ce4c661', 'ext': 'mp4', 'timestamp': 1387830582, 'upload_date': '20131223', 'uploader_id': '297833', 'description': 'Видео группы ★http://vk.com/foxkidsreset★ музей Fox Kids и Jetix<br/><br/> восстановлено и сделано в шикоформате subziro89 http://vk.com/subziro89', 'uploader': 'subziro89 ILya', 'title': 'Мистический городок Эйри в Индиан 5 серия озвучка subziro89', }, 'params': { 'skip_download': True, }, }, { 'url': 'http://rutube.ru/play/embed/8083783', 'only_matching': True, }, { # private video 'url': 'https://rutube.ru/play/embed/10631925?p=IbAigKqWd1do4mjaM5XLIQ', 'only_matching': True, }] def _real_extract(self, url): embed_id = self._match_id(url) # Query may contain private videos token and should be passed to API # requests (see #19163) query = parse_qs(url) options = self._download_api_options(embed_id, query) video_id = options['effective_video'] formats = self._extract_formats(options, video_id) info = self._download_and_extract_info(video_id, query) info.update({ 'extractor_key': 'Rutube', 'formats': formats, }) return info class RutubePlaylistBaseIE(RutubeBaseIE): def _next_page_url(self, page_num, playlist_id, *args, **kwargs): return self._PAGE_TEMPLATE % (playlist_id, page_num) def _entries(self, playlist_id, *args, **kwargs): next_page_url = None for pagenum in itertools.count(1): page = self._download_json( next_page_url or self._next_page_url( pagenum, playlist_id, *args, **kwargs), playlist_id, f'Downloading page {pagenum}') results = page.get('results') if not results or not isinstance(results, list): break for result in results: video_url = url_or_none(result.get('video_url')) if not video_url: continue entry = self._extract_info(result, require_title=False) entry.update({ '_type': 'url', 'url': video_url, 'ie_key': RutubeIE.ie_key(), }) yield entry next_page_url = page.get('next') if not next_page_url or not page.get('has_next'): break def _extract_playlist(self, playlist_id, *args, **kwargs): return self.playlist_result( self._entries(playlist_id, *args, **kwargs), playlist_id, kwargs.get('playlist_name')) def _real_extract(self, url): return self._extract_playlist(self._match_id(url)) class RutubeTagsIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:tags' IE_DESC = 'Rutube tags' _VALID_URL = r'https?://rutube\.ru/tags/video/(?P<id>\d+)' _TESTS = [{ 'url': 'http://rutube.ru/tags/video/1800/', 'info_dict': { 'id': '1800', }, 'playlist_mincount': 68, }] _PAGE_TEMPLATE = 'http://rutube.ru/api/tags/video/%s/?page=%s&format=json' class RutubeMovieIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:movie' IE_DESC = 'Rutube movies' _VALID_URL = r'https?://rutube\.ru/metainfo/tv/(?P<id>\d+)' _MOVIE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/?format=json' _PAGE_TEMPLATE = 'http://rutube.ru/api/metainfo/tv/%s/video?page=%s&format=json' def _real_extract(self, url): movie_id = self._match_id(url) movie = self._download_json( self._MOVIE_TEMPLATE % movie_id, movie_id, 'Downloading movie JSON') return self._extract_playlist( movie_id, playlist_name=movie.get('name')) class RutubePersonIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:person' IE_DESC = 'Rutube person videos' _VALID_URL = r'https?://rutube\.ru/video/person/(?P<id>\d+)' _TESTS = [{ 'url': 'http://rutube.ru/video/person/313878/', 'info_dict': { 'id': '313878', }, 'playlist_mincount': 37, }] _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' class RutubePlaylistIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:playlist' IE_DESC = 'Rutube playlists' _VALID_URL = r'https?://rutube\.ru/(?:video|(?:play/)?embed)/[\da-z]{32}/\?.*?\bpl_id=(?P<id>\d+)' _TESTS = [{ 'url': 'https://rutube.ru/video/cecd58ed7d531fc0f3d795d51cee9026/?pl_id=3097&pl_type=tag', 'info_dict': { 'id': '3097', }, 'playlist_count': 27, }, { 'url': 'https://rutube.ru/video/10b3a03fc01d5bbcc632a2f3514e8aab/?pl_id=4252&pl_type=source', 'only_matching': True, }] _PAGE_TEMPLATE = 'http://rutube.ru/api/playlist/%s/%s/?page=%s&format=json' @classmethod def suitable(cls, url): from ..utils import int_or_none, parse_qs if not super().suitable(url): return False params = parse_qs(url) return params.get('pl_type', [None])[0] and int_or_none(params.get('pl_id', [None])[0]) def _next_page_url(self, page_num, playlist_id, item_kind): return self._PAGE_TEMPLATE % (item_kind, playlist_id, page_num) def _real_extract(self, url): qs = parse_qs(url) playlist_kind = qs['pl_type'][0] playlist_id = qs['pl_id'][0] return self._extract_playlist(playlist_id, item_kind=playlist_kind) class RutubeChannelIE(RutubePlaylistBaseIE): IE_NAME = 'rutube:channel' IE_DESC = 'Rutube channel' _VALID_URL = r'https?://rutube\.ru/channel/(?P<id>\d+)/videos' _TESTS = [{ 'url': 'https://rutube.ru/channel/639184/videos/', 'info_dict': { 'id': '639184', }, 'playlist_mincount': 133, }] _PAGE_TEMPLATE = 'http://rutube.ru/api/video/person/%s/?page=%s&format=json' �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/rutv.py����������������������������������������������������������0000664�0000000�0000000�00000017553�14675634471�0020246�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ExtractorError, int_or_none, str_to_int class RUTVIE(InfoExtractor): IE_DESC = 'RUTV.RU' _VALID_URL = r'''(?x) https?:// (?:test)?player\.(?:rutv\.ru|vgtrk\.com)/ (?P<path> flash\d+v/container\.swf\?id=| iframe/(?P<type>swf|video|live)/id/| index/iframe/cast_id/ ) (?P<id>\d+) ''' _EMBED_URLS = [ r'<iframe[^>]+?src=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/(?:iframe/(?:swf|video|live)/id|index/iframe/cast_id)/.+?)\1', r'<meta[^>]+?property=(["\'])og:video\1[^>]+?content=(["\'])(?P<url>https?://(?:test)?player\.(?:rutv\.ru|vgtrk\.com)/flash\d+v/container\.swf\?id=.+?\2)', ] _TESTS = [ { 'url': 'http://player.rutv.ru/flash2v/container.swf?id=774471&sid=kultura&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972347/video_id/978186/brand_id/31724', 'info_dict': { 'id': '774471', 'ext': 'mp4', 'title': 'Монологи на все времена', 'description': 'md5:18d8b5e6a41fb1faa53819471852d5d5', 'duration': 2906, }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'https://player.vgtrk.com/flash2v/container.swf?id=774016&sid=russiatv&fbv=true&isPlay=true&ssl=false&i=560&acc_video_id=episode_id/972098/video_id/977760/brand_id/57638', 'info_dict': { 'id': '774016', 'ext': 'mp4', 'title': 'Чужой в семье Сталина', 'description': '', 'duration': 2539, }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://player.rutv.ru/iframe/swf/id/766888/sid/hitech/?acc_video_id=4000', 'info_dict': { 'id': '766888', 'ext': 'mp4', 'title': 'Вести.net: интернет-гиганты начали перетягивание программных "одеял"', 'description': 'md5:65ddd47f9830c4f42ed6475f8730c995', 'duration': 279, }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://player.rutv.ru/iframe/video/id/771852/start_zoom/true/showZoomBtn/false/sid/russiatv/?acc_video_id=episode_id/970443/video_id/975648/brand_id/5169', 'info_dict': { 'id': '771852', 'ext': 'mp4', 'title': 'Прямой эфир. Жертвы загадочной болезни: смерть от старости в 17 лет', 'description': 'md5:b81c8c55247a4bd996b43ce17395b2d8', 'duration': 3096, }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://player.rutv.ru/iframe/live/id/51499/showZoomBtn/false/isPlay/true/sid/sochi2014', 'info_dict': { 'id': '51499', 'ext': 'flv', 'title': 'Сочи-2014. Биатлон. Индивидуальная гонка. Мужчины ', 'description': 'md5:9e0ed5c9d2fa1efbfdfed90c9a6d179c', }, 'skip': 'Translation has finished', }, { 'url': 'http://player.rutv.ru/iframe/live/id/21/showZoomBtn/false/isPlay/true/', 'info_dict': { 'id': '21', 'ext': 'mp4', 'title': 're:^Россия 24. Прямой эфир [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'https://testplayer.vgtrk.com/iframe/live/id/19201/showZoomBtn/false/isPlay/true/', 'only_matching': True, }, ] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') video_path = mobj.group('path') if re.match(r'flash\d+v', video_path): video_type = 'video' elif video_path.startswith('iframe'): video_type = mobj.group('type') if video_type == 'swf': video_type = 'video' elif video_path.startswith('index/iframe/cast_id'): video_type = 'live' is_live = video_type == 'live' json_data = self._download_json( 'http://player.vgtrk.com/iframe/data{}/id/{}'.format('live' if is_live else 'video', video_id), video_id, 'Downloading JSON') if json_data['errors']: raise ExtractorError('{} said: {}'.format(self.IE_NAME, json_data['errors']), expected=True) playlist = json_data['data']['playlist'] medialist = playlist['medialist'] media = medialist[0] if media['errors']: raise ExtractorError('{} said: {}'.format(self.IE_NAME, media['errors']), expected=True) view_count = int_or_none(playlist.get('count_views')) priority_transport = playlist['priority_transport'] thumbnail = media['picture'] width = int_or_none(media['width']) height = int_or_none(media['height']) description = media['anons'] title = media['title'] duration = int_or_none(media.get('duration')) formats = [] subtitles = {} for transport, links in media['sources'].items(): for quality, url in links.items(): preference = -1 if priority_transport == transport else -2 if transport == 'rtmp': mobj = re.search(r'^(?P<url>rtmp://[^/]+/(?P<app>.+))/(?P<playpath>.+)$', url) if not mobj: continue fmt = { 'url': mobj.group('url'), 'play_path': mobj.group('playpath'), 'app': mobj.group('app'), 'page_url': 'http://player.rutv.ru', 'player_url': 'http://player.rutv.ru/flash3v/osmf.swf?i=22', 'rtmp_live': True, 'ext': 'flv', 'vbr': str_to_int(quality), } elif transport == 'm3u8': fmt, subs = self._extract_m3u8_formats_and_subtitles( url, video_id, 'mp4', quality=preference, m3u8_id='hls') formats.extend(fmt) self._merge_subtitles(subs, target=subtitles) continue else: fmt = { 'url': url, } fmt.update({ 'width': int_or_none(quality, default=height, invscale=width, scale=height), 'height': int_or_none(quality, default=height), 'format_id': f'{transport}-{quality}', 'source_preference': preference, }) formats.append(fmt) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'view_count': view_count, 'duration': duration, 'formats': formats, 'subtitles': subtitles, 'is_live': is_live, '_format_sort_fields': ('source', ), } �����������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/ruutu.py���������������������������������������������������������0000664�0000000�0000000�00000025245�14675634471�0020427�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, determine_ext, find_xpath_attr, int_or_none, traverse_obj, try_call, unified_strdate, url_or_none, xpath_attr, xpath_text, ) class RuutuIE(InfoExtractor): _VALID_URL = r'''(?x) https?:// (?: (?:www\.)?(?:ruutu|supla)\.fi/(?:video|supla|audio)/| static\.nelonenmedia\.fi/player/misc/embed_player\.html\?.*?\bnid= ) (?P<id>\d+) ''' _TESTS = [ { 'url': 'http://www.ruutu.fi/video/2058907', 'md5': 'ab2093f39be1ca8581963451b3c0234f', 'info_dict': { 'id': '2058907', 'ext': 'mp4', 'title': 'Oletko aina halunnut tietää mitä tapahtuu vain hetki ennen lähetystä? - Nyt se selvisi!', 'description': 'md5:cfc6ccf0e57a814360df464a91ff67d6', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 114, 'age_limit': 0, 'upload_date': '20150508', }, }, { 'url': 'http://www.ruutu.fi/video/2057306', 'md5': '065a10ae4d5b8cfd9d0c3d332465e3d9', 'info_dict': { 'id': '2057306', 'ext': 'mp4', 'title': 'Superpesis: katso koko kausi Ruudussa', 'description': 'md5:bfb7336df2a12dc21d18fa696c9f8f23', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 40, 'age_limit': 0, 'upload_date': '20150507', 'series': 'Superpesis', 'categories': ['Urheilu'], }, }, { 'url': 'http://www.supla.fi/supla/2231370', 'md5': 'df14e782d49a2c0df03d3be2a54ef949', 'info_dict': { 'id': '2231370', 'ext': 'mp4', 'title': 'Osa 1: Mikael Jungner', 'description': 'md5:7d90f358c47542e3072ff65d7b1bcffe', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, 'upload_date': '20151012', 'series': 'Läpivalaisu', }, }, # Episode where <SourceFile> is "NOT-USED", but has other # downloadable sources available. { 'url': 'http://www.ruutu.fi/video/3193728', 'only_matching': True, }, { # audio podcast 'url': 'https://www.supla.fi/supla/3382410', 'md5': 'b9d7155fed37b2ebf6021d74c4b8e908', 'info_dict': { 'id': '3382410', 'ext': 'mp3', 'title': 'Mikä ihmeen poltergeist?', 'description': 'md5:bbb6963df17dfd0ecd9eb9a61bf14b52', 'thumbnail': r're:^https?://.*\.jpg$', 'age_limit': 0, 'upload_date': '20190320', 'series': 'Mysteeritarinat', 'duration': 1324, }, 'expected_warnings': [ 'HTTP Error 502: Bad Gateway', 'Failed to download m3u8 information', ], }, { 'url': 'http://www.supla.fi/audio/2231370', 'only_matching': True, }, { 'url': 'https://static.nelonenmedia.fi/player/misc/embed_player.html?nid=3618790', 'only_matching': True, }, { # episode 'url': 'https://www.ruutu.fi/video/3401964', 'info_dict': { 'id': '3401964', 'ext': 'mp4', 'title': 'Temptation Island Suomi - Kausi 5 - Jakso 17', 'description': 'md5:87cf01d5e1e88adf0c8a2937d2bd42ba', 'thumbnail': r're:^https?://.*\.jpg$', 'duration': 2582, 'age_limit': 12, 'upload_date': '20190508', 'series': 'Temptation Island Suomi', 'season_number': 5, 'episode_number': 17, 'categories': ['Reality ja tositapahtumat', 'Kotimaiset suosikit', 'Romantiikka ja parisuhde'], }, 'params': { 'skip_download': True, }, }, { # premium 'url': 'https://www.ruutu.fi/video/3618715', 'only_matching': True, }, ] _API_BASE = 'https://gatling.nelonenmedia.fi' @classmethod def _extract_embed_urls(cls, url, webpage): # nelonen.fi settings = try_call( lambda: json.loads(re.search( r'jQuery\.extend\(Drupal\.settings, ({.+?})\);', webpage).group(1), strict=False)) if settings: video_id = traverse_obj(settings, ( 'mediaCrossbowSettings', 'file', 'field_crossbow_video_id', 'und', 0, 'value')) if video_id: return [f'http://www.ruutu.fi/video/{video_id}'] # hs.fi and is.fi settings = try_call( lambda: json.loads(re.search( '(?s)<script[^>]+id=[\'"]__NEXT_DATA__[\'"][^>]*>([^<]+)</script>', webpage).group(1), strict=False)) if settings: video_ids = set(traverse_obj(settings, ( 'props', 'pageProps', 'page', 'assetData', 'splitBody', ..., 'video', 'sourceId')) or []) if video_ids: return [f'http://www.ruutu.fi/video/{v}' for v in video_ids] video_id = traverse_obj(settings, ( 'props', 'pageProps', 'page', 'assetData', 'mainVideo', 'sourceId')) if video_id: return [f'http://www.ruutu.fi/video/{video_id}'] def _real_extract(self, url): video_id = self._match_id(url) video_xml = self._download_xml( f'{self._API_BASE}/media-xml-cache', video_id, query={'id': video_id}) formats = [] processed_urls = [] def extract_formats(node): for child in node: if child.tag.endswith('Files'): extract_formats(child) elif child.tag.endswith('File'): video_url = child.text if (not video_url or video_url in processed_urls or any(p in video_url for p in ('NOT_USED', 'NOT-USED'))): continue processed_urls.append(video_url) ext = determine_ext(video_url) auth_video_url = url_or_none(self._download_webpage( f'{self._API_BASE}/auth/access/v2', video_id, note=f'Downloading authenticated {ext} stream URL', fatal=False, query={'stream': video_url})) if auth_video_url: processed_urls.append(auth_video_url) video_url = auth_video_url if ext == 'm3u8': formats.extend(self._extract_m3u8_formats( video_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls', fatal=False)) elif ext == 'f4m': formats.extend(self._extract_f4m_formats( video_url, video_id, f4m_id='hds', fatal=False)) elif ext == 'mpd': # video-only and audio-only streams are of different # duration resulting in out of sync issue continue formats.extend(self._extract_mpd_formats( video_url, video_id, mpd_id='dash', fatal=False)) elif ext == 'mp3' or child.tag == 'AudioMediaFile': formats.append({ 'format_id': 'audio', 'url': video_url, 'vcodec': 'none', }) else: proto = urllib.parse.urlparse(video_url).scheme if not child.tag.startswith('HTTP') and proto != 'rtmp': continue preference = -1 if proto == 'rtmp' else 1 label = child.get('label') tbr = int_or_none(child.get('bitrate')) format_id = f'{proto}-{label if label else tbr}' if label or tbr else proto if not self._is_valid_url(video_url, video_id, format_id): continue width, height = (int_or_none(x) for x in child.get('resolution', 'x').split('x')[:2]) formats.append({ 'format_id': format_id, 'url': video_url, 'width': width, 'height': height, 'tbr': tbr, 'preference': preference, }) extract_formats(video_xml.find('./Clip')) def pv(name): value = try_call(lambda: find_xpath_attr( video_xml, './Clip/PassthroughVariables/variable', 'name', name).get('value')) if value != 'NA': return value or None if not formats: if (not self.get_param('allow_unplayable_formats') and xpath_text(video_xml, './Clip/DRM', default=None)): self.report_drm(video_id) ns_st_cds = pv('ns_st_cds') if ns_st_cds != 'free': raise ExtractorError(f'This video is {ns_st_cds}.', expected=True) themes = pv('themes') return { 'id': video_id, 'title': xpath_attr(video_xml, './/Behavior/Program', 'program_name', 'title', fatal=True), 'description': xpath_attr(video_xml, './/Behavior/Program', 'description', 'description'), 'thumbnail': xpath_attr(video_xml, './/Behavior/Startpicture', 'href', 'thumbnail'), 'duration': int_or_none(xpath_text(video_xml, './/Runtime', 'duration')) or int_or_none(pv('runtime')), 'age_limit': int_or_none(xpath_text(video_xml, './/AgeLimit', 'age limit')), 'upload_date': unified_strdate(pv('date_start')), 'series': pv('series_name'), 'season_number': int_or_none(pv('season_number')), 'episode_number': int_or_none(pv('episode_number')), 'categories': themes.split(',') if themes else None, 'formats': formats, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/ruv.py�����������������������������������������������������������0000664�0000000�0000000�00000015311�14675634471�0020050�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( determine_ext, parse_duration, traverse_obj, unified_timestamp, ) class RuvIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:sarpurinn/[^/]+|node)/(?P<id>[^/]+(?:/\d+)?)' _TESTS = [{ # m3u8 'url': 'http://ruv.is/sarpurinn/ruv-aukaras/fh-valur/20170516', 'md5': '66347652f4e13e71936817102acc1724', 'info_dict': { 'id': '1144499', 'display_id': 'fh-valur/20170516', 'ext': 'mp4', 'title': 'FH - Valur', 'description': 'Bein útsending frá 3. leik FH og Vals í úrslitum Olísdeildar karla í handbolta.', 'timestamp': 1494963600, 'upload_date': '20170516', }, }, { # mp3 'url': 'http://ruv.is/sarpurinn/ras-2/morgunutvarpid/20170619', 'md5': '395ea250c8a13e5fdb39d4670ef85378', 'info_dict': { 'id': '1153630', 'display_id': 'morgunutvarpid/20170619', 'ext': 'mp3', 'title': 'Morgunútvarpið', 'description': 'md5:a4cf1202c0a1645ca096b06525915418', 'timestamp': 1497855000, 'upload_date': '20170619', }, }, { 'url': 'http://ruv.is/sarpurinn/ruv/frettir/20170614', 'only_matching': True, }, { 'url': 'http://www.ruv.is/node/1151854', 'only_matching': True, }, { 'url': 'http://ruv.is/sarpurinn/klippa/secret-soltice-hefst-a-morgun', 'only_matching': True, }, { 'url': 'http://ruv.is/sarpurinn/ras-1/morgunvaktin/20170619', 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) title = self._og_search_title(webpage) FIELD_RE = r'video\.%s\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1' media_url = self._html_search_regex( FIELD_RE % 'src', webpage, 'video URL', group='url') video_id = self._search_regex( r'<link\b[^>]+\bhref=["\']https?://www\.ruv\.is/node/(\d+)', webpage, 'video id', default=display_id) ext = determine_ext(media_url) if ext == 'm3u8': formats = self._extract_m3u8_formats( media_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls') elif ext == 'mp3': formats = [{ 'format_id': 'mp3', 'url': media_url, 'vcodec': 'none', }] else: formats = [{ 'url': media_url, }] description = self._og_search_description(webpage, default=None) thumbnail = self._og_search_thumbnail( webpage, default=None) or self._search_regex( FIELD_RE % 'poster', webpage, 'thumbnail', fatal=False) timestamp = unified_timestamp(self._html_search_meta( 'article:published_time', webpage, 'timestamp', fatal=False)) return { 'id': video_id, 'display_id': display_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'timestamp': timestamp, 'formats': formats, } class RuvSpilaIE(InfoExtractor): IE_NAME = 'ruv.is:spila' _VALID_URL = r'https?://(?:www\.)?ruv\.is/(?:(?:sjon|ut)varp|(?:krakka|ung)ruv)/spila/.+/(?P<series_id>[0-9]+)/(?P<id>[a-z0-9]+)' _TESTS = [{ 'url': 'https://www.ruv.is/sjonvarp/spila/ithrottir/30657/9jcnd4', 'info_dict': { 'id': '9jcnd4', 'ext': 'mp4', 'title': '01.02.2022', 'chapters': 'count:4', 'timestamp': 1643743500, 'upload_date': '20220201', 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/94boog-iti3jg.jpg', 'description': 'Íþróttafréttir.', 'age_limit': 0, }, }, { 'url': 'https://www.ruv.is/utvarp/spila/i-ljosi-sogunnar/23795/7hqkre', 'info_dict': { 'id': '7hqkre', 'ext': 'mp3', 'thumbnail': 'https://d38kdhuogyllre.cloudfront.net/fit-in/1960x/filters:quality(65)/hd_posters/7hqkre-7uepao.jpg', 'description': 'md5:8d7046549daff35e9a3190dc9901a120', 'chapters': [], 'upload_date': '20220204', 'timestamp': 1643965500, 'title': 'Nellie Bly II', 'age_limit': 0, }, }, { 'url': 'https://www.ruv.is/ungruv/spila/ungruv/28046/8beuph', 'only_matching': True, }, { 'url': 'https://www.ruv.is/krakkaruv/spila/krakkafrettir/30712/9jbgb0', 'only_matching': True, }] def _real_extract(self, url): display_id, series_id = self._match_valid_url(url).group('id', 'series_id') program = self._download_json( 'https://www.ruv.is/gql/', display_id, query={'query': '''{ Program(id: %s){ title image description short_description episodes(id: {value: "%s"}) { rating title duration file image firstrun description clips { time text } subtitles { name value } } } }''' % (series_id, display_id)})['data']['Program'] # noqa: UP031 episode = program['episodes'][0] subs = {} for trk in episode.get('subtitles'): if trk.get('name') and trk.get('value'): subs.setdefault(trk['name'], []).append({'url': trk['value'], 'ext': 'vtt'}) media_url = episode['file'] if determine_ext(media_url) == 'm3u8': formats = self._extract_m3u8_formats(media_url, display_id) else: formats = [{'url': media_url}] clips = [ {'start_time': parse_duration(c.get('time')), 'title': c.get('text')} for c in episode.get('clips') or []] return { 'id': display_id, 'title': traverse_obj(program, ('episodes', 0, 'title'), 'title'), 'description': traverse_obj( program, ('episodes', 0, 'description'), 'description', 'short_description', expected_type=lambda x: x or None), 'subtitles': subs, 'thumbnail': episode.get('image', '').replace('$$IMAGESIZE$$', '1960') or None, 'timestamp': unified_timestamp(episode.get('firstrun')), 'formats': formats, 'age_limit': episode.get('rating'), 'chapters': clips, } �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/s4c.py�����������������������������������������������������������0000664�0000000�0000000�00000007752�14675634471�0017737�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import traverse_obj, url_or_none class S4CIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/programme/(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.s4c.cymru/clic/programme/861362209', 'info_dict': { 'id': '861362209', 'ext': 'mp4', 'title': 'Y Swn', 'description': 'md5:f7681a30e4955b250b3224aa9fe70cf0', 'duration': 5340, 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Y_Swn_2023S4C_099_ii.jpg', }, }, { 'url': 'https://www.s4c.cymru/clic/programme/856636948', 'info_dict': { 'id': '856636948', 'ext': 'mp4', 'title': 'Am Dro', 'duration': 2880, 'description': 'md5:100d8686fc9a632a0cb2db52a3433ffe', 'thumbnail': 'https://www.s4c.cymru/amg/1920x1080/Am_Dro_2022-23S4C_P6_4005.jpg', }, }] def _real_extract(self, url): video_id = self._match_id(url) details = self._download_json( f'https://www.s4c.cymru/df/full_prog_details?lang=e&programme_id={video_id}', video_id, fatal=False) player_config = self._download_json( 'https://player-api.s4c-cdn.co.uk/player-configuration/prod', video_id, query={ 'programme_id': video_id, 'signed': '0', 'lang': 'en', 'mode': 'od', 'appId': 'clic', 'streamName': '', }, note='Downloading player config JSON') subtitles = {} for sub in traverse_obj(player_config, ('subtitles', lambda _, v: url_or_none(v['0']))): subtitles.setdefault(sub.get('3', 'en'), []).append({ 'url': sub['0'], 'name': sub.get('1'), }) m3u8_url = self._download_json( 'https://player-api.s4c-cdn.co.uk/streaming-urls/prod', video_id, query={ 'mode': 'od', 'application': 'clic', 'region': 'WW', 'extra': 'false', 'thirdParty': 'false', 'filename': player_config['filename'], }, note='Downloading streaming urls JSON')['hls'] return { 'id': video_id, 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4', m3u8_id='hls'), 'subtitles': subtitles, 'thumbnail': url_or_none(player_config.get('poster')), **traverse_obj(details, ('full_prog_details', 0, { 'title': (('programme_title', 'series_title'), {str}), 'description': ('full_billing', {str.strip}), 'duration': ('duration', {lambda x: int(x) * 60}), }), get_all=False), } class S4CSeriesIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?s4c\.cymru/clic/series/(?P<id>\d+)' _TESTS = [{ 'url': 'https://www.s4c.cymru/clic/series/864982911', 'playlist_mincount': 6, 'info_dict': { 'id': '864982911', 'title': 'Iaith ar Daith', }, }, { 'url': 'https://www.s4c.cymru/clic/series/866852587', 'playlist_mincount': 8, 'info_dict': { 'id': '866852587', 'title': 'FFIT Cymru', }, }] def _real_extract(self, url): series_id = self._match_id(url) series_details = self._download_json( 'https://www.s4c.cymru/df/series_details', series_id, query={ 'lang': 'e', 'series_id': series_id, 'show_prog_in_series': 'Y', }, note='Downloading series details JSON') return self.playlist_result( [self.url_result(f'https://www.s4c.cymru/clic/programme/{episode_id}', S4CIE, episode_id) for episode_id in traverse_obj(series_details, ('other_progs_in_series', ..., 'id'))], series_id, traverse_obj(series_details, ('full_prog_details', 0, 'series_title', {str}))) ����������������������yt-dlp-2024.09.27/yt_dlp/extractor/safari.py��������������������������������������������������������0000664�0000000�0000000�00000022777�14675634471�0020517�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import json import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, update_url_query, ) class SafariBaseIE(InfoExtractor): _LOGIN_URL = 'https://learning.oreilly.com/accounts/login/' _NETRC_MACHINE = 'safari' _API_BASE = 'https://learning.oreilly.com/api/v1' _API_FORMAT = 'json' LOGGED_IN = False def _perform_login(self, username, password): _, urlh = self._download_webpage_handle( 'https://learning.oreilly.com/accounts/login-check/', None, 'Downloading login page') def is_logged(urlh): return 'learning.oreilly.com/home/' in urlh.url if is_logged(urlh): self.LOGGED_IN = True return redirect_url = urlh.url parsed_url = urllib.parse.urlparse(redirect_url) qs = urllib.parse.parse_qs(parsed_url.query) next_uri = urllib.parse.urljoin( 'https://api.oreilly.com', qs['next'][0]) auth, urlh = self._download_json_handle( 'https://www.oreilly.com/member/auth/login/', None, 'Logging in', data=json.dumps({ 'email': username, 'password': password, 'redirect_uri': next_uri, }).encode(), headers={ 'Content-Type': 'application/json', 'Referer': redirect_url, }, expected_status=400) credentials = auth.get('credentials') if (not auth.get('logged_in') and not auth.get('redirect_uri') and credentials): raise ExtractorError( f'Unable to login: {credentials}', expected=True) # oreilly serves two same instances of the following cookies # in Set-Cookie header and expects first one to be actually set for cookie in ('groot_sessionid', 'orm-jwt', 'orm-rt'): self._apply_first_set_cookie_header(urlh, cookie) _, urlh = self._download_webpage_handle( auth.get('redirect_uri') or next_uri, None, 'Completing login') if is_logged(urlh): self.LOGGED_IN = True return raise ExtractorError('Unable to log in') class SafariIE(SafariBaseIE): IE_NAME = 'safari' IE_DESC = 'safaribooksonline.com online video' _VALID_URL = r'''(?x) https?:// (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ (?: library/view/[^/]+/(?P<course_id>[^/]+)/(?P<part>[^/?\#&]+)\.html| videos/[^/]+/[^/]+/(?P<reference_id>[^-]+-[^/?\#&]+) ) ''' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/part00.html', 'md5': 'dcc5a425e79f2564148652616af1f2a3', 'info_dict': { 'id': '0_qbqx90ic', 'ext': 'mp4', 'title': 'Introduction to Hadoop Fundamentals LiveLessons', 'timestamp': 1437758058, 'upload_date': '20150724', 'uploader_id': 'stork', }, }, { # non-digits in course id 'url': 'https://www.safaribooksonline.com/library/view/create-a-nodejs/100000006A0210/part00.html', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/library/view/learning-path-red/9780134664057/RHCE_Introduction.html', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314/9780134217314-PYMC_13_00', 'only_matching': True, }, { 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838/9780133392838-00_SeriesIntro', 'only_matching': True, }, { 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/00_SeriesIntro.html', 'only_matching': True, }] _PARTNER_ID = '1926081' _UICONF_ID = '29375172' def _real_extract(self, url): mobj = self._match_valid_url(url) reference_id = mobj.group('reference_id') if reference_id: video_id = reference_id partner_id = self._PARTNER_ID ui_id = self._UICONF_ID else: video_id = '{}-{}'.format(mobj.group('course_id'), mobj.group('part')) webpage, urlh = self._download_webpage_handle(url, video_id) mobj = re.match(self._VALID_URL, urlh.url) reference_id = mobj.group('reference_id') if not reference_id: reference_id = self._search_regex( r'data-reference-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura reference id', group='id') partner_id = self._search_regex( r'data-partner-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura widget id', default=self._PARTNER_ID, group='id') ui_id = self._search_regex( r'data-ui-id=(["\'])(?P<id>(?:(?!\1).)+)\1', webpage, 'kaltura uiconf id', default=self._UICONF_ID, group='id') query = { 'wid': f'_{partner_id}', 'uiconf_id': ui_id, 'flashvars[referenceId]': reference_id, } if self.LOGGED_IN: kaltura_session = self._download_json( f'{self._API_BASE}/player/kaltura_session/?reference_id={reference_id}', video_id, 'Downloading kaltura session JSON', 'Unable to download kaltura session JSON', fatal=False, headers={'Accept': 'application/json'}) if kaltura_session: session = kaltura_session.get('session') if session: query['flashvars[ks]'] = session return self.url_result(update_url_query( 'https://cdnapisec.kaltura.com/html5/html5lib/v2.37.1/mwEmbedFrame.php', query), 'Kaltura') class SafariApiIE(SafariBaseIE): IE_NAME = 'safari:api' _VALID_URL = r'https?://(?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/api/v1/book/(?P<course_id>[^/]+)/chapter(?:-content)?/(?P<part>[^/?#&]+)\.html' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/api/v1/book/9780133392838/chapter/part00.html', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9780134664057/chapter/RHCE_Introduction.html', 'only_matching': True, }] def _real_extract(self, url): mobj = self._match_valid_url(url) part = self._download_json( url, '{}/{}'.format(mobj.group('course_id'), mobj.group('part')), 'Downloading part JSON') web_url = part['web_url'] if 'library/view' in web_url: web_url = web_url.replace('library/view', 'videos') natural_keys = part['natural_key'] web_url = f'{web_url.rsplit("/", 1)[0]}/{natural_keys[0]}-{natural_keys[1][:-5]}' return self.url_result(web_url, SafariIE.ie_key()) class SafariCourseIE(SafariBaseIE): IE_NAME = 'safari:course' IE_DESC = 'safaribooksonline.com online courses' _VALID_URL = r'''(?x) https?:// (?: (?:www\.)?(?:safaribooksonline|(?:learning\.)?oreilly)\.com/ (?: library/view/[^/]+| api/v1/book| videos/[^/]+ )| techbus\.safaribooksonline\.com ) /(?P<id>[^/]+) ''' _TESTS = [{ 'url': 'https://www.safaribooksonline.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 'info_dict': { 'id': '9780133392838', 'title': 'Hadoop Fundamentals LiveLessons', }, 'playlist_count': 22, 'skip': 'Requires safaribooksonline account credentials', }, { 'url': 'https://www.safaribooksonline.com/api/v1/book/9781449396459/?override_format=json', 'only_matching': True, }, { 'url': 'http://techbus.safaribooksonline.com/9780134426365', 'only_matching': True, }, { 'url': 'https://www.safaribooksonline.com/videos/python-programming-language/9780134217314', 'only_matching': True, }, { 'url': 'https://learning.oreilly.com/videos/hadoop-fundamentals-livelessons/9780133392838', 'only_matching': True, }, { 'url': 'https://www.oreilly.com/library/view/hadoop-fundamentals-livelessons/9780133392838/', 'only_matching': True, }] @classmethod def suitable(cls, url): return (False if SafariIE.suitable(url) or SafariApiIE.suitable(url) else super().suitable(url)) def _real_extract(self, url): course_id = self._match_id(url) course_json = self._download_json( f'{self._API_BASE}/book/{course_id}/?override_format={self._API_FORMAT}', course_id, 'Downloading course JSON') if 'chapters' not in course_json: raise ExtractorError( f'No chapters found for course {course_id}', expected=True) entries = [ self.url_result(chapter, SafariApiIE.ie_key()) for chapter in course_json['chapters']] course_title = course_json['title'] return self.playlist_result(entries, course_id, course_title) �yt-dlp-2024.09.27/yt_dlp/extractor/saitosan.py������������������������������������������������������0000664�0000000�0000000�00000005702�14675634471�0021060�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ExtractorError, try_get class SaitosanIE(InfoExtractor): _WORKING = False IE_NAME = 'Saitosan' _VALID_URL = r'https?://(?:www\.)?saitosan\.net/bview.html\?id=(?P<id>[0-9]+)' _TESTS = [{ 'url': 'http://www.saitosan.net/bview.html?id=10031846', 'info_dict': { 'id': '10031846', 'ext': 'mp4', 'title': '井下原 和弥', 'uploader': '井下原 和弥', 'thumbnail': 'http://111.171.196.85:8088/921f916f-7f55-4c97-b92e-5d9d0fef8f5f/thumb', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, 'skip': 'Broadcasts are ephemeral', }, { 'url': 'http://www.saitosan.net/bview.html?id=10031795', 'info_dict': { 'id': '10031795', 'ext': 'mp4', 'title': '橋本', 'uploader': '橋本', 'thumbnail': 'http://111.171.196.85:8088/1a3933e1-a01a-483b-8931-af15f37f8082/thumb', 'is_live': True, }, 'params': { # m3u8 download 'skip_download': True, }, 'skip': 'Broadcasts are ephemeral', }] def _real_extract(self, url): b_id = self._match_id(url) base = 'http://hankachi.saitosan-api.net:8002/socket.io/?transport=polling&EIO=3' sid = self._download_socket_json(base, b_id, note='Opening socket').get('sid') base += '&sid=' + sid self._download_webpage(base, b_id, note='Polling socket') payload = f'420["room_start_join",{{"room_id":"{b_id}"}}]' payload = f'{len(payload)}:{payload}' self._download_webpage(base, b_id, data=payload, note='Polling socket with payload') response = self._download_socket_json(base, b_id, note='Polling socket') if not response.get('ok'): err = response.get('error') or {} raise ExtractorError( '{} said: {} - {}'.format(self.IE_NAME, err.get('code', '?'), err.get('msg', 'Unknown')) if err else 'The socket reported that the broadcast could not be joined. Maybe it\'s offline or the URL is incorrect', expected=True, video_id=b_id) self._download_webpage(base, b_id, data='26:421["room_finish_join",{}]', note='Polling socket') b_data = self._download_socket_json(base, b_id, note='Getting broadcast metadata from socket') m3u8_url = b_data.get('url') self._download_webpage(base, b_id, data='1:1', note='Closing socket', fatal=False) return { 'id': b_id, 'title': b_data.get('name'), 'formats': self._extract_m3u8_formats(m3u8_url, b_id, 'mp4', live=True), 'thumbnail': m3u8_url.replace('av.m3u8', 'thumb'), 'uploader': try_get(b_data, lambda x: x['broadcast_user']['name']), # same as title 'is_live': True, } ��������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/samplefocus.py���������������������������������������������������0000664�0000000�0000000�00000007741�14675634471�0021565�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( extract_attributes, get_element_by_attribute, int_or_none, ) class SampleFocusIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?samplefocus\.com/samples/(?P<id>[^/?&#]+)' _TESTS = [{ 'url': 'https://samplefocus.com/samples/lil-peep-sad-emo-guitar', 'md5': '48c8d62d60be467293912e0e619a5120', 'info_dict': { 'id': '40316', 'display_id': 'lil-peep-sad-emo-guitar', 'ext': 'mp3', 'title': 'Lil Peep Sad Emo Guitar', 'thumbnail': r're:^https?://.+\.png', 'license': 'Standard License', 'uploader': 'CapsCtrl', 'uploader_id': 'capsctrl', 'like_count': int, 'comment_count': int, 'categories': ['Samples', 'Guitar', 'Electric guitar'], }, }, { 'url': 'https://samplefocus.com/samples/dababy-style-bass-808', 'only_matching': True, }, { 'url': 'https://samplefocus.com/samples/young-chop-kick', 'only_matching': True, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id, impersonate=True) sample_id = self._search_regex( r'<input[^>]+id=(["\'])sample_id\1[^>]+value=(?:["\'])(?P<id>\d+)', webpage, 'sample id', group='id') title = self._og_search_title(webpage, fatal=False) or self._html_search_regex( r'<h1>(.+?)</h1>', webpage, 'title') mp3_url = self._search_regex( r'<input[^>]+id=(["\'])sample_mp3\1[^>]+value=(["\'])(?P<url>(?:(?!\2).)+)', webpage, 'mp3', fatal=False, group='url') or extract_attributes(self._search_regex( r'<meta[^>]+itemprop=(["\'])contentUrl\1[^>]*>', webpage, 'mp3 url', group=0))['content'] thumbnail = self._og_search_thumbnail(webpage) or self._html_search_regex( r'<img[^>]+class=(?:["\'])waveform responsive-img[^>]+src=(["\'])(?P<url>(?:(?!\1).)+)', webpage, 'mp3', fatal=False, group='url') comments = [] for author_id, author, body in re.findall(r'(?s)<p[^>]+class="comment-author"><a[^>]+href="/users/([^"]+)">([^"]+)</a>.+?<p[^>]+class="comment-body">([^>]+)</p>', webpage): comments.append({ 'author': author, 'author_id': author_id, 'text': body, }) uploader_id = uploader = None mobj = re.search(r'>By <a[^>]+href="/users/([^"]+)"[^>]*>([^<]+)', webpage) if mobj: uploader_id, uploader = mobj.groups() breadcrumb = get_element_by_attribute('typeof', 'BreadcrumbList', webpage) categories = [] if breadcrumb: for _, name in re.findall(r'<span[^>]+property=(["\'])name\1[^>]*>([^<]+)', breadcrumb): categories.append(name) def extract_count(klass): return int_or_none(self._html_search_regex( rf'<span[^>]+class=(?:["\'])?{klass}-count[^>]*>(\d+)', webpage, klass, fatal=False)) return { 'id': sample_id, 'title': title, 'formats': [{ 'url': mp3_url, 'ext': 'mp3', 'vcodec': 'none', 'acodec': 'mp3', 'http_headers': { 'Referer': url, }, }], 'display_id': display_id, 'thumbnail': thumbnail, 'uploader': uploader, 'license': self._html_search_regex( r'<a[^>]+href=(["\'])/license\1[^>]*>(?P<license>[^<]+)<', webpage, 'license', fatal=False, group='license'), 'uploader_id': uploader_id, 'like_count': extract_count(f'sample-{sample_id}-favorites'), 'comment_count': extract_count('comments'), 'comments': comments, 'categories': categories, } �������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/sapo.py����������������������������������������������������������0000664�0000000�0000000�00000010454�14675634471�0020201�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import re from .common import InfoExtractor from ..utils import ( parse_duration, unified_strdate, ) class SapoIE(InfoExtractor): IE_DESC = 'SAPO Vídeos' _VALID_URL = r'https?://(?:(?:v2|www)\.)?videos\.sapo\.(?:pt|cv|ao|mz|tl)/(?P<id>[\da-zA-Z]{20})' _TESTS = [ { 'url': 'http://videos.sapo.pt/UBz95kOtiWYUMTA5Ghfi', 'md5': '79ee523f6ecb9233ac25075dee0eda83', 'note': 'SD video', 'info_dict': { 'id': 'UBz95kOtiWYUMTA5Ghfi', 'ext': 'mp4', 'title': 'Benfica - Marcas na Hitória', 'description': 'md5:c9082000a128c3fd57bf0299e1367f22', 'duration': 264, 'uploader': 'tiago_1988', 'upload_date': '20080229', 'categories': ['benfica', 'cabral', 'desporto', 'futebol', 'geovanni', 'hooijdonk', 'joao', 'karel', 'lisboa', 'miccoli'], }, }, { 'url': 'http://videos.sapo.pt/IyusNAZ791ZdoCY5H5IF', 'md5': '90a2f283cfb49193fe06e861613a72aa', 'note': 'HD video', 'info_dict': { 'id': 'IyusNAZ791ZdoCY5H5IF', 'ext': 'mp4', 'title': 'Codebits VII - Report', 'description': 'md5:6448d6fd81ce86feac05321f354dbdc8', 'duration': 144, 'uploader': 'codebits', 'upload_date': '20140427', 'categories': ['codebits', 'codebits2014'], }, }, { 'url': 'http://v2.videos.sapo.pt/yLqjzPtbTimsn2wWBKHz', 'md5': 'e5aa7cc0bdc6db9b33df1a48e49a15ac', 'note': 'v2 video', 'info_dict': { 'id': 'yLqjzPtbTimsn2wWBKHz', 'ext': 'mp4', 'title': 'Hipnose Condicionativa 4', 'description': 'md5:ef0481abf8fb4ae6f525088a6dadbc40', 'duration': 692, 'uploader': 'sapozen', 'upload_date': '20090609', 'categories': ['condicionativa', 'heloisa', 'hipnose', 'miranda', 'sapo', 'zen'], }, }, ] def _real_extract(self, url): mobj = self._match_valid_url(url) video_id = mobj.group('id') item = self._download_xml( f'http://rd3.videos.sapo.pt/{video_id}/rss2', video_id).find('./channel/item') title = item.find('./title').text description = item.find('./{http://videos.sapo.pt/mrss/}synopse').text thumbnail = item.find('./{http://search.yahoo.com/mrss/}content').get('url') duration = parse_duration(item.find('./{http://videos.sapo.pt/mrss/}time').text) uploader = item.find('./{http://videos.sapo.pt/mrss/}author').text upload_date = unified_strdate(item.find('./pubDate').text) view_count = int(item.find('./{http://videos.sapo.pt/mrss/}views').text) comment_count = int(item.find('./{http://videos.sapo.pt/mrss/}comment_count').text) tags = item.find('./{http://videos.sapo.pt/mrss/}tags').text categories = tags.split() if tags else [] age_limit = 18 if item.find('./{http://videos.sapo.pt/mrss/}m18').text == 'true' else 0 video_url = item.find('./{http://videos.sapo.pt/mrss/}videoFile').text video_size = item.find('./{http://videos.sapo.pt/mrss/}videoSize').text.split('x') formats = [{ 'url': video_url, 'ext': 'mp4', 'format_id': 'sd', 'width': int(video_size[0]), 'height': int(video_size[1]), }] if item.find('./{http://videos.sapo.pt/mrss/}HD').text == 'true': formats.append({ 'url': re.sub(r'/mov/1$', '/mov/39', video_url), 'ext': 'mp4', 'format_id': 'hd', 'width': 1280, 'height': 720, }) return { 'id': video_id, 'title': title, 'description': description, 'thumbnail': thumbnail, 'duration': duration, 'uploader': uploader, 'upload_date': upload_date, 'view_count': view_count, 'comment_count': comment_count, 'categories': categories, 'age_limit': age_limit, 'formats': formats, } ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/sbs.py�����������������������������������������������������������0000664�0000000�0000000�00000014624�14675634471�0020031�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..networking import HEADRequest from ..utils import ( float_or_none, int_or_none, parse_duration, parse_iso8601, traverse_obj, update_url_query, url_or_none, ) class SBSIE(InfoExtractor): IE_DESC = 'sbs.com.au' _VALID_URL = r'''(?x) https?://(?:www\.)?sbs\.com\.au/(?: ondemand(?: /video/(?:single/)?| /(?:movie|tv-program)/[^/]+/| /(?:tv|news)-series/(?:[^/]+/){3}| .*?\bplay=|/watch/ )|news/(?:embeds/)?video/ )(?P<id>[0-9]+)''' _EMBED_REGEX = [r'''(?x)] (?: <meta\s+property="og:video"\s+content=| <iframe[^>]+?src= ) (["\'])(?P<url>https?://(?:www\.)?sbs\.com\.au/ondemand/video/.+?)\1'''] _TESTS = [{ # Original URL is handled by the generic IE which finds the iframe: # http://www.sbs.com.au/thefeed/blog/2014/08/21/dingo-conservation 'url': 'http://www.sbs.com.au/ondemand/video/single/320403011771/?source=drupal&vertical=thefeed', 'md5': '31f84a7a19b53635db63c73f8ab0c4a7', 'info_dict': { 'id': '320403011771', # '_rFBPRPO4pMR', 'ext': 'mp4', 'title': 'Dingo Conservation (The Feed)', 'description': 'md5:f250a9856fca50d22dec0b5b8015f8a5', 'thumbnail': r're:https?://.*\.jpg', 'duration': 308, 'timestamp': 1408613220, 'upload_date': '20140821', 'uploader': 'SBSC', }, 'expected_warnings': ['Unable to download JSON metadata'], }, { 'url': 'http://www.sbs.com.au/ondemand/video/320403011771/Dingo-Conservation-The-Feed', 'only_matching': True, }, { 'url': 'http://www.sbs.com.au/news/video/471395907773/The-Feed-July-9', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/?play=1836638787723', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/program/inside-windsor-castle?play=1283505731842', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/news/embeds/video/1840778819866', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/watch/1698704451971', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/movie/coherence/1469404227931', 'only_matching': True, }, { 'note': 'Live stream', 'url': 'https://www.sbs.com.au/ondemand/video/1726824003663/sbs-24x7-live-stream-nsw', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/news-series/dateline/dateline-2022/dateline-s2022-ep26/2072245827515', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/tv-series/the-handmaids-tale/season-5/the-handmaids-tale-s5-ep1/2065631811776', 'only_matching': True, }, { 'url': 'https://www.sbs.com.au/ondemand/tv-program/autun-romes-forgotten-sister/2116212803602', 'only_matching': True, }] _GEO_COUNTRIES = ['AU'] _AUS_TV_PARENTAL_GUIDELINES = { 'P': 0, 'C': 7, 'G': 0, 'PG': 0, 'M': 14, 'MA15+': 15, 'MAV15+': 15, 'R18+': 18, } _PLAYER_API = 'https://www.sbs.com.au/api/v3' def _real_extract(self, url): video_id = self._match_id(url) formats, subtitles = self._extract_smil_formats_and_subtitles( update_url_query(f'{self._PLAYER_API}/video_smil', {'id': video_id}), video_id) if not formats: urlh = self._request_webpage( HEADRequest('https://sbs-vod-prod-01.akamaized.net/'), video_id, note='Checking geo-restriction', fatal=False, expected_status=403) if urlh: error_reasons = urlh.headers.get_all('x-error-reason') or [] if 'geo-blocked' in error_reasons: self.raise_geo_restricted(countries=['AU']) self.raise_no_formats('No formats are available', video_id=video_id) media = traverse_obj(self._download_json( f'{self._PLAYER_API}/video_stream', video_id, fatal=False, query={'id': video_id, 'context': 'tv'}), ('video_object', {dict})) or {} media.update(self._download_json( f'https://catalogue.pr.sbsod.com/mpx-media/{video_id}', video_id, fatal=not media) or {}) # For named episodes, use the catalogue's title to set episode, rather than generic 'Episode N'. if traverse_obj(media, ('partOfSeries', {dict})): media['epName'] = traverse_obj(media, ('title', {str})) return { 'id': video_id, **traverse_obj(media, { 'title': ('name', {str}), 'description': ('description', {str}), 'channel': ('taxonomy', 'channel', 'name', {str}), 'series': ((('partOfSeries', 'name'), 'seriesTitle'), {str}), 'series_id': ((('partOfSeries', 'uuid'), 'seriesID'), {str}), 'season_number': ('seasonNumber', {int_or_none}), 'episode': ('epName', {str}), 'episode_number': ('episodeNumber', {int_or_none}), 'timestamp': (('datePublished', ('publication', 'startDate')), {parse_iso8601}), 'release_year': ('releaseYear', {int_or_none}), 'duration': ('duration', ({float_or_none}, {parse_duration})), 'is_live': ('liveStream', {bool}), 'age_limit': (('classificationID', 'contentRating'), {str.upper}, { lambda x: self._AUS_TV_PARENTAL_GUIDELINES.get(x)}), # dict.get is unhashable in py3.7 }, get_all=False), **traverse_obj(media, { 'categories': (('genres', ...), ('taxonomy', ('genre', 'subgenre'), 'name'), {str}), 'tags': (('consumerAdviceTexts', ('sbsSubCertification', 'consumerAdvice')), ..., {str}), 'thumbnails': ('thumbnails', lambda _, v: url_or_none(v['contentUrl']), { 'id': ('name', {str}), 'url': 'contentUrl', 'width': ('width', {int_or_none}), 'height': ('height', {int_or_none}), }), }), 'formats': formats, 'subtitles': subtitles, 'uploader': 'SBSC', } ������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/sbscokr.py�������������������������������������������������������0000664�0000000�0000000�00000020132�14675634471�0020677�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import ( clean_html, int_or_none, parse_iso8601, parse_resolution, url_or_none, ) from ..utils.traversal import traverse_obj class SBSCoKrIE(InfoExtractor): IE_NAME = 'sbs.co.kr' _VALID_URL = [r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Package)?EndPage\.do\?(?:[^#]+&)?mdaId=(?P<id>\d+)', r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv|kth)/[a-z0-9]+/(?:vod|clip|movie)/\d+/(?P<id>(?:OC)?\d+)'] _TESTS = [{ 'url': 'https://programs.sbs.co.kr/enter/dongsang2/clip/52007/OC467706746?div=main_pop_clip', 'md5': 'c3f6d45e1fb5682039d94cda23c36f19', 'info_dict': { 'id': 'OC467706746', 'ext': 'mp4', 'title': '‘아슬아슬’ 박군♥한영의 새 집 인테리어 대첩♨', 'description': 'md5:6a71eb1979ee4a94ea380310068ccab4', 'thumbnail': 'https://img2.sbs.co.kr/ops_clip_img/2023/10/10/34c4c0f9-a9a5-4ff6-a92e-9bb4b5f6fa65915w1280.jpg', 'release_timestamp': 1696889400, 'release_date': '20231009', 'view_count': int, 'like_count': int, 'duration': 238, 'age_limit': 15, 'series': '동상이몽2_너는 내 운명', 'episode': '레이디제인, ‘혼전임신설’ ‘3개월’ 앞당긴 결혼식 비하인드 스토리 최초 공개!', 'episode_number': 311, }, }, { 'url': 'https://allvod.sbs.co.kr/allvod/vodPackageEndPage.do?mdaId=22000489324&combiId=PA000000284&packageType=A&isFreeYN=', 'md5': 'bf46b2e89fda7ae7de01f5743cef7236', 'info_dict': { 'id': '22000489324', 'ext': 'mp4', 'title': '[다시보기] 트롤리 15회', 'description': 'md5:0e55d74bef1ac55c61ae90c73ac485f4', 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/02/14/arC1676333794938-1280-720.jpg', 'release_timestamp': 1676325600, 'release_date': '20230213', 'view_count': int, 'like_count': int, 'duration': 5931, 'age_limit': 15, 'series': '트롤리', 'episode': '이거 다 거짓말이야', 'episode_number': 15, }, }, { 'url': 'https://programs.sbs.co.kr/enter/fourman/vod/69625/22000508948', 'md5': '41e8ae4cc6c8424f4e4d76661a4becbf', 'info_dict': { 'id': '22000508948', 'ext': 'mp4', 'title': '[다시보기] 신발 벗고 돌싱포맨 104회', 'description': 'md5:c6a247383c4dd661e4b956bf4d3b586e', 'thumbnail': 'https://img2.sbs.co.kr/img/sbs_cms/WE/2023/08/30/2vb1693355446261-1280-720.jpg', 'release_timestamp': 1693342800, 'release_date': '20230829', 'view_count': int, 'like_count': int, 'duration': 7036, 'age_limit': 15, 'series': '신발 벗고 돌싱포맨', 'episode': '돌싱포맨 저격수들 등장!', 'episode_number': 104, }, }] def _call_api(self, video_id, rscuse=''): return self._download_json( f'https://api.play.sbs.co.kr/1.0/sbs_vodall/{video_id}', video_id, note=f'Downloading m3u8 information {rscuse}', query={ 'platform': 'pcweb', 'protocol': 'download', 'absolute_show': 'Y', 'service': 'program', 'ssl': 'Y', 'rscuse': rscuse, }) def _real_extract(self, url): video_id = self._match_id(url) details = self._call_api(video_id) source = traverse_obj(details, ('vod', 'source', 'mediasource', {dict})) or {} formats = [] for stream in traverse_obj(details, ( 'vod', 'source', 'mediasourcelist', lambda _, v: v['mediaurl'] or v['mediarscuse'], ), default=[source]): if not stream.get('mediaurl'): new_source = traverse_obj( self._call_api(video_id, rscuse=stream['mediarscuse']), ('vod', 'source', 'mediasource', {dict})) or {} if new_source.get('mediarscuse') == source.get('mediarscuse') or not new_source.get('mediaurl'): continue stream = new_source formats.append({ 'url': stream['mediaurl'], 'format_id': stream.get('mediarscuse'), 'format_note': stream.get('medianame'), **parse_resolution(stream.get('quality')), 'preference': int_or_none(stream.get('mediarscuse')), }) caption_url = traverse_obj(details, ('vod', 'source', 'subtitle', {url_or_none})) return { 'id': video_id, **traverse_obj(details, ('vod', { 'title': ('info', 'title'), 'duration': ('info', 'duration', {int_or_none}), 'view_count': ('info', 'viewcount', {int_or_none}), 'like_count': ('info', 'likecount', {int_or_none}), 'description': ('info', 'synopsis', {clean_html}), 'episode': ('info', 'content', ('contenttitle', 'title')), 'episode_number': ('info', 'content', 'number', {int_or_none}), 'series': ('info', 'program', 'programtitle'), 'age_limit': ('info', 'targetage', {int_or_none}), 'release_timestamp': ('info', 'broaddate', {parse_iso8601}), 'thumbnail': ('source', 'thumbnail', 'origin', {url_or_none}), }), get_all=False), 'formats': formats, 'subtitles': {'ko': [{'url': caption_url}]} if caption_url else None, } class SBSCoKrAllvodProgramIE(InfoExtractor): IE_NAME = 'sbs.co.kr:allvod_program' _VALID_URL = r'https?://allvod\.sbs\.co\.kr/allvod/vod(?:Free)?ProgramDetail\.do\?(?:[^#]+&)?pgmId=(?P<id>P?\d+)' _TESTS = [{ 'url': 'https://allvod.sbs.co.kr/allvod/vodFreeProgramDetail.do?type=legend&pgmId=22000010159&listOrder=vodCntAsc', 'info_dict': { '_type': 'playlist', 'id': '22000010159', }, 'playlist_count': 18, }, { 'url': 'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId=P460810577', 'info_dict': { '_type': 'playlist', 'id': 'P460810577', }, 'playlist_count': 13, }] def _real_extract(self, url): program_id = self._match_id(url) details = self._download_json( 'https://allvod.sbs.co.kr/allvod/vodProgramDetail/vodProgramDetailAjax.do', program_id, note='Downloading program details', query={ 'pgmId': program_id, 'currentCount': '10000', }) return self.playlist_result( [self.url_result(f'https://allvod.sbs.co.kr/allvod/vodEndPage.do?mdaId={video_id}', SBSCoKrIE) for video_id in traverse_obj(details, ('list', ..., 'mdaId'))], program_id) class SBSCoKrProgramsVodIE(InfoExtractor): IE_NAME = 'sbs.co.kr:programs_vod' _VALID_URL = r'https?://programs\.sbs\.co\.kr/(?:enter|drama|culture|sports|plus|mtv)/(?P<id>[a-z0-9]+)/vods' _TESTS = [{ 'url': 'https://programs.sbs.co.kr/culture/morningwide/vods/65007', 'info_dict': { '_type': 'playlist', 'id': '00000210215', }, 'playlist_mincount': 9782, }, { 'url': 'https://programs.sbs.co.kr/enter/dongsang2/vods/52006', 'info_dict': { '_type': 'playlist', 'id': '22000010476', }, 'playlist_mincount': 312, }] def _real_extract(self, url): program_slug = self._match_id(url) program_id = self._download_json( f'https://static.apis.sbs.co.kr/program-api/1.0/menu/{program_slug}', program_slug, note='Downloading program menu data')['program']['programid'] return self.url_result( f'https://allvod.sbs.co.kr/allvod/vodProgramDetail.do?pgmId={program_id}', SBSCoKrAllvodProgramIE) ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/screen9.py�������������������������������������������������������0000664�0000000�0000000�00000005056�14675634471�0020611�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������from .common import InfoExtractor from ..utils import traverse_obj class Screen9IE(InfoExtractor): _VALID_URL = r'https?://(?:\w+\.screen9\.(?:tv|com)|play\.su\.se)/(?:embed|media)/(?P<id>[^?#/]+)' _TESTS = [ { 'url': 'https://api.screen9.com/embed/8kTNEjvoXGM33dmWwF0uDA', 'md5': 'd60d23f8980583b930724b01fa6ddb41', 'info_dict': { 'id': '8kTNEjvoXGM33dmWwF0uDA', 'ext': 'mp4', 'title': 'Östersjön i förändrat klimat', 'thumbnail': r're:^https?://.+\.jpg', }, }, { 'url': 'https://folkhogskolekanalen.screen9.tv/media/gy35PKLHe-5K29RYHga2bw/ett-starkare-samhalle-en-snabbguide-om-sveriges-folkhogskolor', 'md5': 'c9389806e78573ea34fc48b6f94465dc', 'info_dict': { 'id': 'gy35PKLHe-5K29RYHga2bw', 'ext': 'mp4', 'title': 'Ett starkare samhälle - en snabbguide om Sveriges folkhögskolor', 'thumbnail': r're:^https?://.+\.jpg', }, }, { 'url': 'https://play.su.se/media/H1YA0EYNCxiesrSU1kaRBQ/baltic-breakfast', 'md5': '2b817647c3058002526269deff4c0683', 'info_dict': { 'id': 'H1YA0EYNCxiesrSU1kaRBQ', 'ext': 'mp4', 'title': 'Baltic Breakfast', 'thumbnail': r're:^https?://.+\.jpg', }, }, ] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(f'https://api.screen9.com/embed/{video_id}', video_id) config = self._search_json(r'var\s+config\s*=', webpage, 'config', video_id) formats, subtitles = self._extract_m3u8_formats_and_subtitles( traverse_obj(config, ('src', lambda _, v: v['type'] == 'application/x-mpegURL', 'src'), get_all=False), video_id, ext='mp4') formats.append({ 'url': traverse_obj(config, ('src', lambda _, v: v['type'] == 'video/mp4', 'src'), get_all=False), 'format': 'mp4', }) return { 'id': video_id, 'title': traverse_obj( config, ('plugins', (('title', 'title'), ('googleAnalytics', 'title'), ('share', 'mediaTitle'))), get_all=False), 'description': traverse_obj(config, ('plugins', 'title', 'description')), 'thumbnail': traverse_obj(config, ('poster')), 'formats': formats, 'subtitles': subtitles, } ����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������yt-dlp-2024.09.27/yt_dlp/extractor/screencast.py����������������������������������������������������0000664�0000000�0000000�00000010723�14675634471�0021370�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������import urllib.parse from .common import InfoExtractor from ..utils import ExtractorError class ScreencastIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?screencast\.com/t/(?P<id>[a-zA-Z0-9]+)' _TESTS = [{ 'url': 'http://www.screencast.com/t/3ZEjQXlT', 'md5': '917df1c13798a3e96211dd1561fded83', 'info_dict': { 'id': '3ZEjQXlT', 'ext': 'm4v', 'title': 'Color Measurement with Ocean Optics Spectrometers', 'description': 'md5:240369cde69d8bed61349a199c5fb153', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', }, }, { 'url': 'http://www.screencast.com/t/V2uXehPJa1ZI', 'md5': 'e8e4b375a7660a9e7e35c33973410d34', 'info_dict': { 'id': 'V2uXehPJa1ZI', 'ext': 'mov', 'title': 'The Amadeus Spectrometer', 'description': 're:^In this video, our friends at.*To learn more about Amadeus, visit', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', }, }, { 'url': 'http://www.screencast.com/t/aAB3iowa', 'md5': 'dedb2734ed00c9755761ccaee88527cd', 'info_dict': { 'id': 'aAB3iowa', 'ext': 'mp4', 'title': 'Google Earth Export', 'description': 'Provides a demo of a CommunityViz export to Google Earth, one of the 3D viewing options.', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', }, }, { 'url': 'http://www.screencast.com/t/X3ddTrYh', 'md5': '669ee55ff9c51988b4ebc0877cc8b159', 'info_dict': { 'id': 'X3ddTrYh', 'ext': 'wmv', 'title': 'Toolkit 6 User Group Webinar (2014-03-04) - Default Judgment and First Impression', 'description': 'md5:7b9f393bc92af02326a5c5889639eab0', 'thumbnail': r're:^https?://.*\.(?:gif|jpg)$', }, }, { 'url': 'http://screencast.com/t/aAB3iowa', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) video_url = self._html_search_regex( r'<embed name="Video".*?src="([^"]+)"', webpage, 'QuickTime embed', default=None) if video_url is None: flash_vars_s = self._html_search_regex( r'<param name="flashVars" value="([^"]+)"', webpage, 'flash vars', default=None) if not flash_vars_s: flash_vars_s = self._html_search_regex( r'<param name="initParams" value="([^"]+)"', webpage, 'flash vars', default=None) if flash_vars_s: flash_vars_s = flash_vars_s.replace(',', '&') if flash_vars_s: flash_vars = urllib.parse.parse_qs(flash_vars_s) video_url_raw = urllib.parse.quote( flash_vars['content'][0]) video_url = video_url_raw.replace('http%3A', 'http:') if video_url is None: video_meta = self._html_search_meta( 'og:video', webpage, default=None) if video_meta: video_url = self._search_regex( r'src=(.*?)(?:$|&)', video_meta, 'meta tag video URL', default=None) if video_url is None: video_url = self._html_search_regex( r'MediaContentUrl["\']\s*:(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, 'video url', default=None, group='url') if video_url is None: video_url = self._html_search_meta( 'og:video', webpage, default=None) if video_url is None: raise ExtractorError('Cannot find video') title = self._og_search_title(webpage, default=None) if title is None: title = self._html_search_regex( [r'<b>Title:</b> ([^<]+)</div>', r'class="tabSeperator">></span><span class="tabText">(.+?)<', r'<title>([^<]+)'], webpage, 'title') thumbnail = self._og_search_thumbnail(webpage) description = self._og_search_description(webpage, default=None) if description is None: description = self._html_search_meta('description', webpage) return { 'id': video_id, 'url': video_url, 'title': title, 'description': description, 'thumbnail': thumbnail, } yt-dlp-2024.09.27/yt_dlp/extractor/screencastify.py000066400000000000000000000047261467563447100221060ustar00rootroot00000000000000import urllib.parse from .common import InfoExtractor from ..utils import traverse_obj, update_url_query class ScreencastifyIE(InfoExtractor): _VALID_URL = [ r'https?://watch\.screencastify\.com/v/(?P[^/?#]+)', r'https?://app\.screencastify\.com/v[23]/watch/(?P[^/?#]+)', ] _TESTS = [{ 'url': 'https://watch.screencastify.com/v/sYVkZip3quLKhHw4Ybk8', 'info_dict': { 'id': 'sYVkZip3quLKhHw4Ybk8', 'ext': 'mp4', 'title': 'Inserting and Aligning the Case Top and Bottom', 'description': '', 'uploader': 'Paul Gunn', 'extra_param_to_segment_url': str, }, 'params': { 'skip_download': 'm3u8', }, }, { 'url': 'https://app.screencastify.com/v3/watch/J5N7H11wofDN1jZUCr3t', 'info_dict': { 'id': 'J5N7H11wofDN1jZUCr3t', 'ext': 'mp4', 'uploader': 'Scott Piesen', 'description': '', 'title': 'Lesson Recording 1-17 Burrr...', }, 'params': { 'skip_download': 'm3u8', }, }, { 'url': 'https://app.screencastify.com/v2/watch/BQ26VbUdfbQLhKzkktOk', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) info = self._download_json( f'https://umbrella.svc.screencastify.com/api/umbrellaService/watch/{video_id}', video_id) query_string = traverse_obj(info, ('manifest', 'auth', 'query')) query = urllib.parse.parse_qs(query_string) formats = [] dash_manifest_url = traverse_obj(info, ('manifest', 'url')) if dash_manifest_url: formats.extend( self._extract_mpd_formats( dash_manifest_url, video_id, mpd_id='dash', query=query, fatal=False)) hls_manifest_url = traverse_obj(info, ('manifest', 'hlsUrl')) if hls_manifest_url: formats.extend( self._extract_m3u8_formats( hls_manifest_url, video_id, ext='mp4', m3u8_id='hls', query=query, fatal=False)) for f in formats: f['url'] = update_url_query(f['url'], query) return { 'id': video_id, 'title': info.get('title'), 'description': info.get('description'), 'uploader': info.get('userName'), 'formats': formats, 'extra_param_to_segment_url': query_string, } yt-dlp-2024.09.27/yt_dlp/extractor/screencastomatic.py000066400000000000000000000056101467563447100225640ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import ( ExtractorError, get_element_by_class, int_or_none, remove_start, strip_or_none, unified_strdate, urlencode_postdata, ) class ScreencastOMaticIE(InfoExtractor): _VALID_URL = r'https?://screencast-o-matic\.com/(?:(?:watch|player)/|embed\?.*?\bsc=)(?P[0-9a-zA-Z]+)' _TESTS = [{ 'url': 'http://screencast-o-matic.com/watch/c2lD3BeOPl', 'md5': '483583cb80d92588f15ccbedd90f0c18', 'info_dict': { 'id': 'c2lD3BeOPl', 'ext': 'mp4', 'title': 'Welcome to 3-4 Philosophy @ DECV!', 'thumbnail': r're:^https?://.*\.jpg$', 'description': 'as the title says! also: some general info re 1) VCE philosophy and 2) distance learning.', 'duration': 369, 'upload_date': '20141216', }, }, { 'url': 'http://screencast-o-matic.com/player/c2lD3BeOPl', 'only_matching': True, }, { 'url': 'http://screencast-o-matic.com/embed?ff=true&sc=cbV2r4Q5TL&fromPH=true&a=1', 'only_matching': True, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage( 'https://screencast-o-matic.com/player/' + video_id, video_id) if (self._html_extract_title(webpage) == 'Protected Content' or 'This video is private and requires a password' in webpage): password = self.get_param('videopassword') if not password: raise ExtractorError('Password protected video, use --video-password ', expected=True) form = self._search_regex( r'(?is)]*>(?P
.+?)
', webpage, 'login form', group='form') form_data = self._hidden_inputs(form) form_data.update({ 'scPassword': password, }) webpage = self._download_webpage( 'https://screencast-o-matic.com/player/password', video_id, 'Logging in', data=urlencode_postdata(form_data)) if 'Invalid password' in webpage: raise ExtractorError('Unable to login: Invalid password', expected=True) info = self._parse_html5_media_entries(url, webpage, video_id)[0] info.update({ 'id': video_id, 'title': get_element_by_class('overlayTitle', webpage), 'description': strip_or_none(get_element_by_class('overlayDescription', webpage)) or None, 'duration': int_or_none(self._search_regex( r'player\.duration\s*=\s*function\(\)\s*{\s*return\s+(\d+);\s*};', webpage, 'duration', default=None)), 'upload_date': unified_strdate(remove_start( get_element_by_class('overlayPublished', webpage), 'Published: ')), }) return info yt-dlp-2024.09.27/yt_dlp/extractor/screenrec.py000066400000000000000000000022231467563447100212030ustar00rootroot00000000000000from .common import InfoExtractor class ScreenRecIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?screenrec\.com/share/(?P\w{10})' _TESTS = [{ 'url': 'https://screenrec.com/share/DasLtbknYo', 'info_dict': { 'id': 'DasLtbknYo', 'ext': 'mp4', 'title': '02.05.2024_03.01.25_REC', 'description': 'Recorded with ScreenRec', 'thumbnail': r're:^https?://.*\.gif$', }, 'params': { 'skip_download': True, }, }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) m3u8_url = self._search_regex( r'customUrl\s*:\s*(["\'])(?P(?:(?!\1).)+)\1', webpage, 'm3u8 URL', group='url') return { 'id': video_id, 'title': self._og_search_title(webpage, default=None) or self._html_extract_title(webpage), 'description': self._og_search_description(webpage), 'thumbnail': self._og_search_thumbnail(webpage), 'formats': self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4'), } yt-dlp-2024.09.27/yt_dlp/extractor/scrippsnetworks.py000066400000000000000000000135051467563447100225170ustar00rootroot00000000000000import hashlib import json from .anvato import AnvatoIE from .aws import AWSIE from .common import InfoExtractor from ..utils import ( smuggle_url, urlencode_postdata, xpath_text, ) class ScrippsNetworksWatchIE(AWSIE): IE_NAME = 'scrippsnetworks:watch' _VALID_URL = r'''(?x) https?:// watch\. (?Pgeniuskitchen)\.com/ (?: player\.[A-Z0-9]+\.html\#| show/(?:[^/]+/){2}| player/ ) (?P\d+) ''' _TESTS = [{ 'url': 'http://watch.geniuskitchen.com/player/3787617/Ample-Hills-Ice-Cream-Bike/', 'info_dict': { 'id': '4194875', 'ext': 'mp4', 'title': 'Ample Hills Ice Cream Bike', 'description': 'Courtney Rada churns up a signature GK Now ice cream with The Scoopmaster.', 'uploader': 'ANV', 'upload_date': '20171011', 'timestamp': 1507698000, }, 'params': { 'skip_download': True, }, 'add_ie': [AnvatoIE.ie_key()], 'skip': '404 Not Found', }] _SNI_TABLE = { 'geniuskitchen': 'genius', } _AWS_API_KEY = 'E7wSQmq0qK6xPrF13WmzKiHo4BQ7tip4pQcSXVl1' _AWS_PROXY_HOST = 'web.api.video.snidigital.com' _AWS_USER_AGENT = 'aws-sdk-js/2.80.0 callback' def _real_extract(self, url): mobj = self._match_valid_url(url) site_id, video_id = mobj.group('site', 'id') aws_identity_id_json = json.dumps({ 'IdentityId': f'{self._AWS_REGION}:7655847c-0ae7-4d9b-80d6-56c062927eb3', }).encode() token = self._download_json( f'https://cognito-identity.{self._AWS_REGION}.amazonaws.com/', video_id, data=aws_identity_id_json, headers={ 'Accept': '*/*', 'Content-Type': 'application/x-amz-json-1.1', 'Referer': url, 'X-Amz-Content-Sha256': hashlib.sha256(aws_identity_id_json).hexdigest(), 'X-Amz-Target': 'AWSCognitoIdentityService.GetOpenIdToken', 'X-Amz-User-Agent': self._AWS_USER_AGENT, })['Token'] sts = self._download_xml( 'https://sts.amazonaws.com/', video_id, data=urlencode_postdata({ 'Action': 'AssumeRoleWithWebIdentity', 'RoleArn': 'arn:aws:iam::710330595350:role/Cognito_WebAPIUnauth_Role', 'RoleSessionName': 'web-identity', 'Version': '2011-06-15', 'WebIdentityToken': token, }), headers={ 'Referer': url, 'X-Amz-User-Agent': self._AWS_USER_AGENT, 'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', }) def get(key): return xpath_text( sts, f'.//{{https://sts.amazonaws.com/doc/2011-06-15/}}{key}', fatal=True) mcp_id = self._aws_execute_api({ 'uri': f'/1/web/brands/{self._SNI_TABLE[site_id]}/episodes/scrid/{video_id}', 'access_key': get('AccessKeyId'), 'secret_key': get('SecretAccessKey'), 'session_token': get('SessionToken'), }, video_id)['results'][0]['mcpId'] return self.url_result( smuggle_url( f'anvato:anvato_scripps_app_web_prod_0837996dbe373629133857ae9eb72e740424d80a:{mcp_id}', {'geo_countries': ['US']}), AnvatoIE.ie_key(), video_id=mcp_id) class ScrippsNetworksIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?(?Pcookingchanneltv|discovery|(?:diy|food)network|hgtv|travelchannel)\.com/videos/[0-9a-z-]+-(?P\d+)' _TESTS = [{ 'url': 'https://www.cookingchanneltv.com/videos/the-best-of-the-best-0260338', 'info_dict': { 'id': '0260338', 'ext': 'mp4', 'title': 'The Best of the Best', 'description': 'Catch a new episode of MasterChef Canada Tuedsay at 9/8c.', 'timestamp': 1475678834, 'upload_date': '20161005', 'uploader': 'SCNI-SCND', 'tags': 'count:10', 'creator': 'Cooking Channel', 'duration': 29.995, 'chapters': [{'start_time': 0.0, 'end_time': 29.995, 'title': ''}], 'thumbnail': 'https://images.dds.discovery.com/up/tp/Scripps_-_Food_Category_Prod/122/987/0260338_630x355.jpg', }, 'add_ie': ['ThePlatform'], 'expected_warnings': ['No HLS formats found'], }, { 'url': 'https://www.diynetwork.com/videos/diy-barnwood-tablet-stand-0265790', 'only_matching': True, }, { 'url': 'https://www.foodnetwork.com/videos/chocolate-strawberry-cake-roll-7524591', 'only_matching': True, }, { 'url': 'https://www.hgtv.com/videos/cookie-decorating-101-0301929', 'only_matching': True, }, { 'url': 'https://www.travelchannel.com/videos/two-climates-one-bag-5302184', 'only_matching': True, }, { 'url': 'https://www.discovery.com/videos/guardians-of-the-glades-cooking-with-tom-cobb-5578368', 'only_matching': True, }] _ACCOUNT_MAP = { 'cookingchanneltv': 2433005105, 'discovery': 2706091867, 'diynetwork': 2433004575, 'foodnetwork': 2433005105, 'hgtv': 2433004575, 'travelchannel': 2433005739, } _TP_TEMPL = 'https://link.theplatform.com/s/ip77QC/media/guid/%d/%s?mbr=true' def _real_extract(self, url): site, guid = self._match_valid_url(url).groups() return self.url_result(smuggle_url( self._TP_TEMPL % (self._ACCOUNT_MAP[site], guid), {'force_smil_url': True}), 'ThePlatform', guid) yt-dlp-2024.09.27/yt_dlp/extractor/scrolller.py000066400000000000000000000072661467563447100212470ustar00rootroot00000000000000import json from .common import InfoExtractor from ..utils import determine_ext, int_or_none class ScrolllerIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?scrolller\.com/(?P[\w-]+)' _TESTS = [{ 'url': 'https://scrolller.com/a-helping-hand-1k9pxikxkw', 'info_dict': { 'id': 'a-helping-hand-1k9pxikxkw', 'ext': 'mp4', 'thumbnail': 'https://zepto.scrolller.com/a-helping-hand-3ty9q8x094-540x960.jpg', 'title': 'A helping hand', 'age_limit': 0, }, }, { 'url': 'https://scrolller.com/tigers-chasing-a-drone-c5d1f2so6j', 'info_dict': { 'id': 'tigers-chasing-a-drone-c5d1f2so6j', 'ext': 'mp4', 'thumbnail': 'https://zepto.scrolller.com/tigers-chasing-a-drone-az9pkpguwe-540x303.jpg', 'title': 'Tigers chasing a drone', 'age_limit': 0, }, }, { 'url': 'https://scrolller.com/baby-rhino-smells-something-9chhugsv9p', 'info_dict': { 'id': 'baby-rhino-smells-something-9chhugsv9p', 'ext': 'mp4', 'thumbnail': 'https://atto.scrolller.com/hmm-whats-that-smell-bh54mf2c52-300x224.jpg', 'title': 'Baby rhino smells something', 'age_limit': 0, }, }, { 'url': 'https://scrolller.com/its-all-fun-and-games-cco8jjmoh7', 'info_dict': { 'id': 'its-all-fun-and-games-cco8jjmoh7', 'ext': 'mp4', 'thumbnail': 'https://atto.scrolller.com/its-all-fun-and-games-3amk9vg7m3-540x649.jpg', 'title': 'It\'s all fun and games...', 'age_limit': 0, }, }, { 'url': 'https://scrolller.com/may-the-force-be-with-you-octokuro-yeytg1fs7a', 'info_dict': { 'id': 'may-the-force-be-with-you-octokuro-yeytg1fs7a', 'ext': 'mp4', 'thumbnail': 'https://thumbs2.redgifs.com/DarkStarchyNautilus-poster.jpg', 'title': 'May the force be with you (Octokuro)', 'age_limit': 18, }, }] def _real_extract(self, url): video_id = self._match_id(url) query = { 'query': '''{ getSubredditPost(url:"/%s"){ id title isNsfw mediaSources{ url width height } } }''' % video_id, # noqa: UP031 } video_data = self._download_json( 'https://api.scrolller.com/api/v2/graphql', video_id, data=json.dumps(query).encode(), headers={'Content-Type': 'application/json'})['data']['getSubredditPost'] formats, thumbnails = [], [] for source in video_data['mediaSources']: if determine_ext(source.get('url')) in ('jpg', 'png'): thumbnails.append({ 'url': source['url'], 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), }) elif source.get('url'): formats.append({ 'url': source['url'], 'width': int_or_none(source.get('width')), 'height': int_or_none(source.get('height')), }) if not formats: self.raise_no_formats('There is no video.', expected=True, video_id=video_id) return { 'id': video_id, 'title': video_data.get('title'), 'thumbnails': thumbnails, 'formats': formats, 'age_limit': 18 if video_data.get('isNsfw') else 0, } yt-dlp-2024.09.27/yt_dlp/extractor/scte.py000066400000000000000000000113741467563447100201770ustar00rootroot00000000000000import re from .common import InfoExtractor from ..utils import ( ExtractorError, decode_packed_codes, urlencode_postdata, ) class SCTEBaseIE(InfoExtractor): _LOGIN_URL = 'https://www.scte.org/SCTE/Sign_In.aspx' _NETRC_MACHINE = 'scte' def _perform_login(self, username, password): login_popup = self._download_webpage( self._LOGIN_URL, None, 'Downloading login popup') def is_logged(webpage): return any(re.search(p, webpage) for p in ( r'class=["\']welcome\b', r'>Sign Out<')) # already logged in if is_logged(login_popup): return login_form = self._hidden_inputs(login_popup) login_form.update({ 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInUserName': username, 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$signInPassword': password, 'ctl01$TemplateBody$WebPartManager1$gwpciNewContactSignInCommon$ciNewContactSignInCommon$RememberMe': 'on', }) response = self._download_webpage( self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(login_form)) if '|pageRedirect|' not in response and not is_logged(response): error = self._html_search_regex( r'(?s)<[^>]+class=["\']AsiError["\'][^>]*>(.+?)\d+)' _TESTS = [{ 'url': 'https://learning.scte.org/mod/scorm/view.php?id=31484', 'info_dict': { 'title': 'Introduction to DOCSIS Engineering Professional', 'id': '31484', }, 'playlist_count': 5, 'skip': 'Requires account credentials', }] def _real_extract(self, url): video_id = self._match_id(url) webpage = self._download_webpage(url, video_id) title = self._search_regex(r'

(.+?)

', webpage, 'title') context_id = self._search_regex(r'context-(\d+)', webpage, video_id) content_base = f'https://learning.scte.org/pluginfile.php/{context_id}/mod_scorm/content/8/' context = decode_packed_codes(self._download_webpage( f'{content_base}mobile/data.js', video_id)) data = self._parse_xml( self._search_regex( r'CreateData\(\s*"(.+?)"', context, 'data').replace(r"\'", "'"), video_id) entries = [] for asset in data.findall('.//asset'): asset_url = asset.get('url') if not asset_url or not asset_url.endswith('.mp4'): continue asset_id = self._search_regex( r'video_([^_]+)_', asset_url, 'asset id', default=None) if not asset_id: continue entries.append({ 'id': asset_id, 'title': title, 'url': content_base + asset_url, }) return self.playlist_result(entries, video_id, title) class SCTECourseIE(SCTEBaseIE): _WORKING = False _VALID_URL = r'https?://learning\.scte\.org/(?:mod/sub)?course/view\.php?.*?\bid=(?P\d+)' _TESTS = [{ 'url': 'https://learning.scte.org/mod/subcourse/view.php?id=31491', 'only_matching': True, }, { 'url': 'https://learning.scte.org/course/view.php?id=3639', 'only_matching': True, }, { 'url': 'https://learning.scte.org/course/view.php?id=3073', 'only_matching': True, }] def _real_extract(self, url): course_id = self._match_id(url) webpage = self._download_webpage(url, course_id) title = self._search_regex( r'

(.+?)

', webpage, 'title', default=None) entries = [] for mobj in re.finditer( r'''(?x) ]+ href=(["\']) (?P https?://learning\.scte\.org/mod/ (?Pscorm|subcourse)/view\.php?(?:(?!\1).)*? \bid=\d+ ) ''', webpage): item_url = mobj.group('url') if item_url == url: continue ie = (SCTEIE.ie_key() if mobj.group('kind') == 'scorm' else SCTECourseIE.ie_key()) entries.append(self.url_result(item_url, ie=ie)) return self.playlist_result(entries, course_id, title) yt-dlp-2024.09.27/yt_dlp/extractor/sejmpl.py000066400000000000000000000206411467563447100205300ustar00rootroot00000000000000import datetime as dt from .common import InfoExtractor from .redge import RedCDNLivxIE from ..utils import ( clean_html, join_nonempty, js_to_json, strip_or_none, update_url_query, ) from ..utils.traversal import traverse_obj def is_dst(date): last_march = dt.datetime(date.year, 3, 31) last_october = dt.datetime(date.year, 10, 31) last_sunday_march = last_march - dt.timedelta(days=last_march.isoweekday() % 7) last_sunday_october = last_october - dt.timedelta(days=last_october.isoweekday() % 7) return last_sunday_march.replace(hour=2) <= date <= last_sunday_october.replace(hour=3) def rfc3339_to_atende(date): date = dt.datetime.fromisoformat(date) date = date + dt.timedelta(hours=1 if is_dst(date) else 0) return int((date.timestamp() - 978307200) * 1000) class SejmIE(InfoExtractor): _VALID_URL = ( r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P\d+)\.nsf/transmisje(?:_arch)?\.xsp(?:\?[^#]*)?#(?P[\dA-F]+)', r'https?://(?:www\.)?sejm\.gov\.pl/[Ss]ejm(?P\d+)\.nsf/transmisje(?:_arch)?\.xsp\?(?:[^#]+&)?unid=(?P[\dA-F]+)', r'https?://sejm-embed\.redcdn\.pl/[Ss]ejm(?P\d+)\.nsf/VideoFrame\.xsp/(?P[\dA-F]+)', ) IE_NAME = 'sejm' _TESTS = [{ # multiple cameras, polish SL iterpreter 'url': 'https://www.sejm.gov.pl/Sejm10.nsf/transmisje_arch.xsp#6181EF1AD9CEEBB5C1258A6D006452B5', 'info_dict': { 'id': '6181EF1AD9CEEBB5C1258A6D006452B5', 'title': '1. posiedzenie Sejmu X kadencji', 'duration': 20145, 'live_status': 'was_live', 'location': 'Sala Posiedzeń', }, 'playlist': [{ 'info_dict': { 'id': 'ENC01-722340000000-722360145000', 'ext': 'mp4', 'duration': 20145, 'title': '1. posiedzenie Sejmu X kadencji - ENC01', 'live_status': 'was_live', }, }, { 'info_dict': { 'id': 'ENC30-722340000000-722360145000', 'ext': 'mp4', 'duration': 20145, 'title': '1. posiedzenie Sejmu X kadencji - ENC30', 'live_status': 'was_live', }, }, { 'info_dict': { 'id': 'ENC31-722340000000-722360145000', 'ext': 'mp4', 'duration': 20145, 'title': '1. posiedzenie Sejmu X kadencji - ENC31', 'live_status': 'was_live', }, }, { 'info_dict': { 'id': 'ENC32-722340000000-722360145000', 'ext': 'mp4', 'duration': 20145, 'title': '1. posiedzenie Sejmu X kadencji - ENC32', 'live_status': 'was_live', }, }, { # sign lang interpreter 'info_dict': { 'id': 'Migacz-ENC01-1-722340000000-722360145000', 'ext': 'mp4', 'duration': 20145, 'title': '1. posiedzenie Sejmu X kadencji - Migacz-ENC01', 'live_status': 'was_live', }, }], }, { 'url': 'https://www.sejm.gov.pl/Sejm8.nsf/transmisje.xsp?unid=9377A9D65518E9A5C125808E002E9FF2', 'info_dict': { 'id': '9377A9D65518E9A5C125808E002E9FF2', 'title': 'Debata "Lepsza Polska: obywatelska"', 'description': 'KP .Nowoczesna', 'duration': 8770, 'live_status': 'was_live', 'location': 'sala kolumnowa im. Kazimierza Pużaka (bud. C-D)', }, 'playlist': [{ 'info_dict': { 'id': 'ENC08-1-503831270000-503840040000', 'ext': 'mp4', 'duration': 8770, 'title': 'Debata "Lepsza Polska: obywatelska" - ENC08', 'live_status': 'was_live', }, }], }, { # 7th term is very special, since it does not use redcdn livx 'url': 'https://www.sejm.gov.pl/sejm7.nsf/transmisje_arch.xsp?rok=2015&month=11#A6E6D475ECCC6FE5C1257EF90034817F', 'info_dict': { 'id': 'A6E6D475ECCC6FE5C1257EF90034817F', 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu', 'description': 'SLD - Biuro Prasowe Klubu', 'duration': 514, 'location': 'sala 101/bud. C', 'live_status': 'was_live', }, 'playlist': [{ 'info_dict': { 'id': 'A6E6D475ECCC6FE5C1257EF90034817F', 'ext': 'mp4', 'title': 'Konferencja prasowa - Stanowisko SLD ws. składu nowego rządu', 'duration': 514, }, }], }, { 'url': 'https://sejm-embed.redcdn.pl/Sejm10.nsf/VideoFrame.xsp/FED58EABB97FBD53C1258A7400386492', 'only_matching': True, }] def _real_extract(self, url): term, video_id = self._match_valid_url(url).group('term', 'id') frame = self._download_webpage( f'https://sejm-embed.redcdn.pl/Sejm{term}.nsf/VideoFrame.xsp/{video_id}', video_id) # despite it says "transmisje_arch", it works for live streams too! data = self._download_json( f'https://www.sejm.gov.pl/Sejm{term}.nsf/transmisje_arch.xsp/json/{video_id}', video_id) params = data['params'] title = strip_or_none(data.get('title')) if data.get('status') == 'VIDEO_ENDED': live_status = 'was_live' elif data.get('status') == 'VIDEO_PLAYING': live_status = 'is_live' else: live_status = None self.report_warning(f'unknown status: {data.get("status")}') start_time = rfc3339_to_atende(params['start']) # current streams have a stop time of *expected* end of session, but actual times # can change during the transmission. setting a stop_time would artificially # end the stream at that time, while the session actually keeps going. if live_status == 'was_live': stop_time = rfc3339_to_atende(params['stop']) duration = (stop_time - start_time) // 1000 else: stop_time, duration = None, None entries = [] def add_entry(file, legacy_file=False): if not file: return file = self._proto_relative_url(file) if not legacy_file: file = update_url_query(file, {'startTime': start_time}) if stop_time is not None: file = update_url_query(file, {'stopTime': stop_time}) stream_id = self._search_regex(r'/o2/sejm/([^/]+)/[^./]+\.livx', file, 'stream id') common_info = { 'url': file, 'duration': duration, } if legacy_file: entries.append({ **common_info, 'id': video_id, 'title': title, }) else: entries.append({ **common_info, '_type': 'url_transparent', 'ie_key': RedCDNLivxIE.ie_key(), 'id': stream_id, 'title': join_nonempty(title, stream_id, delim=' - '), }) cameras = self._search_json( r'var\s+cameras\s*=', frame, 'camera list', video_id, contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json, fatal=False) or [] for camera_file in traverse_obj(cameras, (..., 'file', {dict})): if camera_file.get('flv'): add_entry(camera_file['flv']) elif camera_file.get('mp4'): # this is only a thing in 7th term. no streams before, and starting 8th it's redcdn livx add_entry(camera_file['mp4'], legacy_file=True) else: self.report_warning('Unknown camera stream type found') if params.get('mig'): add_entry(self._search_regex(r"var sliUrl\s*=\s*'([^']+)'", frame, 'sign language interpreter url', fatal=False)) return { '_type': 'playlist', 'entries': entries, 'id': video_id, 'title': title, 'description': clean_html(data.get('desc')) or None, 'duration': duration, 'live_status': live_status, 'location': strip_or_none(data.get('location')), } yt-dlp-2024.09.27/yt_dlp/extractor/sen.py000066400000000000000000000026631467563447100200270ustar00rootroot00000000000000from .common import InfoExtractor from ..utils import url_or_none from ..utils.traversal import traverse_obj class SenIE(InfoExtractor): _VALID_URL = r'https?://(?:www\.)?sen\.com/video/(?P[0-9a-f-]+)' _TEST = { 'url': 'https://www.sen.com/video/eef46eb1-4d79-4e28-be9d-bd937767f8c4', 'md5': 'ff615aca9691053c94f8f10d96cd7884', 'info_dict': { 'id': 'eef46eb1-4d79-4e28-be9d-bd937767f8c4', 'ext': 'mp4', 'description': 'Florida, 28 Sep 2022', 'title': 'Hurricane Ian', 'tags': ['North America', 'Storm', 'Weather'], }, } def _real_extract(self, url): video_id = self._match_id(url) api_data = self._download_json(f'https://api.sen.com/content/public/video/{video_id}', video_id) m3u8_url = (traverse_obj(api_data, ( 'data', 'nodes', lambda _, v: v['id'] == 'player', 'video', 'url', {url_or_none}, any)) or f'https://vod.sen.com/videos/{video_id}/manifest.m3u8') return { 'id': video_id, 'formats': self._extract_m3u8_formats(m3u8_url, video_id, 'mp4'), **traverse_obj(api_data, ('data', 'nodes', lambda _, v: v['id'] == 'details', any, 'content', { 'title': ('title', 'text', {str}), 'description': ('descriptions', 0, 'text', {str}), 'tags': ('badges', ..., 'text', {str}), })), } yt-dlp-2024.09.27/yt_dlp/extractor/senalcolombia.py000066400000000000000000000020541467563447100220440ustar00rootroot00000000000000from .common import InfoExtractor from .rtvcplay import RTVCKalturaIE class SenalColombiaLiveIE(InfoExtractor): _WORKING = False _VALID_URL = r'https?://(?:www\.)?senalcolombia\.tv/(?Psenal-en-vivo)' _TESTS = [{ 'url': 'https://www.senalcolombia.tv/senal-en-vivo', 'info_dict': { 'id': 'indexSC', 'title': 're:^Señal Colombia', 'description': 'md5:799f16a401d97f40c33a2c6a3e2a507b', 'thumbnail': r're:^https?://.*\.(?:jpg|png)', 'live_status': 'is_live', 'ext': 'mp4', }, 'params': { 'skip_download': 'Livestream', }, }] def _real_extract(self, url): display_id = self._match_id(url) webpage = self._download_webpage(url, display_id) hydration = self._search_json( r']*data-drupal-selector\s*=\s*"[^"]*drupal-settings-json[^"]*"[^>]*>', webpage, 'hydration', display_id) return self.url_result(hydration['envivosrc'], RTVCKalturaIE, display_id) yt-dlp-2024.09.27/yt_dlp/extractor/senategov.py000066400000000000000000000207171467563447100212350ustar00rootroot00000000000000import re import urllib.parse from .common import InfoExtractor from ..utils import ( ExtractorError, parse_qs, unsmuggle_url, ) _COMMITTEES = { 'ag': ('76440', 'http://ag-f.akamaihd.net'), 'aging': ('76442', 'http://aging-f.akamaihd.net'), 'approps': ('76441', 'http://approps-f.akamaihd.net'), 'arch': ('', 'http://ussenate-f.akamaihd.net'), 'armed': ('76445', 'http://armed-f.akamaihd.net'), 'banking': ('76446', 'http://banking-f.akamaihd.net'), 'budget': ('76447', 'http://budget-f.akamaihd.net'), 'cecc': ('76486', 'http://srs-f.akamaihd.net'), 'commerce': ('80177', 'http://commerce1-f.akamaihd.net'), 'csce': ('75229', 'http://srs-f.akamaihd.net'), 'dpc': ('76590', 'http://dpc-f.akamaihd.net'), 'energy': ('76448', 'http://energy-f.akamaihd.net'), 'epw': ('76478', 'http://epw-f.akamaihd.net'), 'ethics': ('76449', 'http://ethics-f.akamaihd.net'), 'finance': ('76450', 'http://finance-f.akamaihd.net'), 'foreign': ('76451', 'http://foreign-f.akamaihd.net'), 'govtaff': ('76453', 'http://govtaff-f.akamaihd.net'), 'help': ('76452', 'http://help-f.akamaihd.net'), 'indian': ('76455', 'http://indian-f.akamaihd.net'), 'intel': ('76456', 'http://intel-f.akamaihd.net'), 'intlnarc': ('76457', 'http://intlnarc-f.akamaihd.net'), 'jccic': ('85180', 'http://jccic-f.akamaihd.net'), 'jec': ('76458', 'http://jec-f.akamaihd.net'), 'judiciary': ('76459', 'http://judiciary-f.akamaihd.net'), 'rpc': ('76591', 'http://rpc-f.akamaihd.net'), 'rules': ('76460', 'http://rules-f.akamaihd.net'), 'saa': ('76489', 'http://srs-f.akamaihd.net'), 'smbiz': ('76461', 'http://smbiz-f.akamaihd.net'), 'srs': ('75229', 'http://srs-f.akamaihd.net'), 'uscc': ('76487', 'http://srs-f.akamaihd.net'), 'vetaff': ('76462', 'http://vetaff-f.akamaihd.net'), } class SenateISVPIE(InfoExtractor): _IE_NAME = 'senate.gov:isvp' _VALID_URL = r'https?://(?:www\.)?senate\.gov/isvp/?\?(?P.+)' _EMBED_REGEX = [r"]+src=['\"](?Phttps?://www\.senate\.gov/isvp/?\?[^'\"]+)['\"]"] _TESTS = [{ 'url': 'http://www.senate.gov/isvp/?comm=judiciary&type=live&stt=&filename=judiciary031715&auto_play=false&wmode=transparent&poster=http%3A%2F%2Fwww.judiciary.senate.gov%2Fthemes%2Fjudiciary%2Fimages%2Fvideo-poster-flash-fit.png', 'info_dict': { 'id': 'judiciary031715', 'ext': 'mp4', 'title': 'Integrated Senate Video Player', 'thumbnail': r're:^https?://.*\.(?:jpg|png)$', }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://www.senate.gov/isvp/?type=live&comm=commerce&filename=commerce011514.mp4&auto_play=false', 'info_dict': { 'id': 'commerce011514', 'ext': 'mp4', 'title': 'Integrated Senate Video Player', }, 'params': { # m3u8 download 'skip_download': True, }, }, { 'url': 'http://www.senate.gov/isvp/?type=arch&comm=intel&filename=intel090613&hc_location=ufi', # checksum differs each time 'info_dict': { 'id': 'intel090613', 'ext': 'mp4', 'title': 'Integrated Senate Video Player', }, }, { # From http://www.c-span.org/video/?96791-1 'url': 'http://www.senate.gov/isvp?type=live&comm=banking&filename=banking012715', 'only_matching': True, }] def _real_extract(self, url): url, smuggled_data = unsmuggle_url(url, {}) qs = urllib.parse.parse_qs(self._match_valid_url(url).group('qs')) if not qs.get('filename') or not qs.get('type') or not qs.get('comm'): raise ExtractorError('Invalid URL', expected=True) video_id = re.sub(r'.mp4$', '', qs['filename'][0]) webpage = self._download_webpage(url, video_id) if smuggled_data.get('force_title'): title = smuggled_data['force_title'] else: title = self._html_extract_title(webpage) poster = qs.get('poster') thumbnail = poster[0] if poster else None video_type = qs['type'][0] committee = video_type if video_type == 'arch' else qs['comm'][0] stream_num, domain = _COMMITTEES[committee] formats = [] if video_type == 'arch': filename = video_id if '.' in video_id else video_id + '.mp4' m3u8_url = urllib.parse.urljoin(domain, 'i/' + filename + '/master.m3u8') formats = self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8') else: hdcore_sign = 'hdcore=3.1.0' url_params = (domain, video_id, stream_num) f4m_url = f'%s/z/%s_1@%s/manifest.f4m?{hdcore_sign}' % url_params m3u8_url = '{}/i/{}_1@{}/master.m3u8'.format(*url_params) for entry in self._extract_f4m_formats(f4m_url, video_id, f4m_id='f4m'): # URLs without the extra param induce an 404 error entry.update({'extra_param_to_segment_url': hdcore_sign}) formats.append(entry) for entry in self._extract_m3u8_formats(m3u8_url, video_id, ext='mp4', m3u8_id='m3u8'): mobj = re.search(r'(?P(?:-p|-b)).m3u8', entry['url']) if mobj: entry['format_id'] += mobj.group('tag') formats.append(entry) return { 'id': video_id, 'title': title, 'formats': formats, 'thumbnail': thumbnail, } class SenateGovIE(InfoExtractor): _IE_NAME = 'senate.gov' _VALID_URL = r'https?:\/\/(?:www\.)?(help|appropriations|judiciary|banking|armed-services|finance)\.senate\.gov' _TESTS = [{ 'url': 'https://www.help.senate.gov/hearings/vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', 'info_dict': { 'id': 'help090920', 'display_id': 'vaccines-saving-lives-ensuring-confidence-and-protecting-public-health', 'title': 'Vaccines: Saving Lives, Ensuring Confidence, and Protecting Public Health', 'description': 'The U.S. Senate Committee on Health, Education, Labor & Pensions', 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.appropriations.senate.gov/hearings/watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', 'info_dict': { 'id': 'appropsA051518', 'display_id': 'watch?hearingid=B8A25434-5056-A066-6020-1F68CB75F0CD', 'title': 'Review of the FY2019 Budget Request for the U.S. Army', 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, }, { 'url': 'https://www.banking.senate.gov/hearings/21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', 'info_dict': { 'id': 'banking041521', 'display_id': '21st-century-communities-public-transportation-infrastructure-investment-and-fast-act-reauthorization', 'title': '21st Century Communities: Public Transportation Infrastructure Investment and FAST Act Reauthorization', 'description': 'The Official website of The United States Committee on Banking, Housing, and Urban Affairs', 'ext': 'mp4', }, 'params': {'skip_download': 'm3u8'}, }] def _real_extract(self, url): display_id = self._generic_id(url) webpage = self._download_webpage(url, display_id) parse_info = parse_qs(self._search_regex( r'