mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-18 15:55:30 +02:00
Compare commits
47 Commits
415b4c9f95
...
ba090caeaa
Author | SHA1 | Date | |
---|---|---|---|
![]() |
ba090caeaa | ||
![]() |
339614a173 | ||
![]() |
aa863ddab9 | ||
![]() |
db162b76f6 | ||
![]() |
e3c605a61f | ||
![]() |
97ddfefeb4 | ||
![]() |
a8bf0011bd | ||
![]() |
13e5516271 | ||
![]() |
03dba2012d | ||
![]() |
5d96527be8 | ||
![]() |
1fd0e88b67 | ||
![]() |
231349786e | ||
![]() |
f37d599a69 | ||
![]() |
9e38b273b7 | ||
![]() |
4e7c1ea346 | ||
![]() |
e1b6062f8c | ||
![]() |
c723c4e5e7 | ||
![]() |
148a1eb4c5 | ||
![]() |
85c8a405e3 | ||
![]() |
943083edcd | ||
![]() |
3fe72e9eea | ||
![]() |
d30a49742c | ||
![]() |
6d265388c6 | ||
![]() |
a9b3700698 | ||
![]() |
201812100f | ||
![]() |
cc749a8a3b | ||
![]() |
f7bbf5a617 | ||
![]() |
b5be29fa58 | ||
![]() |
6121559e02 | ||
![]() |
2e5bf002da | ||
![]() |
6693d66033 | ||
![]() |
b094747e93 | ||
![]() |
98f8eec956 | ||
![]() |
0daddc780d | ||
![]() |
2d7949d564 | ||
![]() |
ed108b3ea4 | ||
![]() |
eee90acc47 | ||
![]() |
711c5d5d09 | ||
![]() |
89c1b349ad | ||
![]() |
0ee1102268 | ||
![]() |
7794374de8 | ||
![]() |
538eb30567 | ||
![]() |
f8051e3a61 | ||
![]() |
52f9729c9a | ||
![]() |
1a8a03ea8d | ||
![]() |
e0d6c08229 | ||
![]() |
53ea743a9c |
11
.github/workflows/build.yml
vendored
11
.github/workflows/build.yml
vendored
@ -256,7 +256,7 @@ jobs:
|
||||
with:
|
||||
path: |
|
||||
~/yt-dlp-build-venv
|
||||
key: cache-reqs-${{ github.job }}
|
||||
key: cache-reqs-${{ github.job }}-${{ github.ref }}
|
||||
|
||||
- name: Install Requirements
|
||||
run: |
|
||||
@ -331,19 +331,16 @@ jobs:
|
||||
if: steps.restore-cache.outputs.cache-hit == 'true'
|
||||
env:
|
||||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
cache_key: cache-reqs-${{ github.job }}
|
||||
repository: ${{ github.repository }}
|
||||
branch: ${{ github.ref }}
|
||||
cache_key: cache-reqs-${{ github.job }}-${{ github.ref }}
|
||||
run: |
|
||||
gh extension install actions/gh-actions-cache
|
||||
gh actions-cache delete "${cache_key}" -R "${repository}" -B "${branch}" --confirm
|
||||
gh cache delete "${cache_key}"
|
||||
|
||||
- name: Cache requirements
|
||||
uses: actions/cache/save@v4
|
||||
with:
|
||||
path: |
|
||||
~/yt-dlp-build-venv
|
||||
key: cache-reqs-${{ github.job }}
|
||||
key: cache-reqs-${{ github.job }}-${{ github.ref }}
|
||||
|
||||
macos_legacy:
|
||||
needs: process
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -105,6 +105,8 @@ README.txt
|
||||
*.zsh
|
||||
*.spec
|
||||
test/testdata/sigs/player-*.js
|
||||
test/testdata/thumbnails/empty.webp
|
||||
test/testdata/thumbnails/foo\ %d\ bar/foo_%d.*
|
||||
|
||||
# Binary
|
||||
/youtube-dl
|
||||
|
@ -775,3 +775,7 @@ GeoffreyFrogeye
|
||||
Pawka
|
||||
v3DJG6GL
|
||||
yozel
|
||||
brian6932
|
||||
iednod55
|
||||
maxbin123
|
||||
nullpos
|
||||
|
55
Changelog.md
55
Changelog.md
@ -4,6 +4,61 @@
|
||||
# To create a release, dispatch the https://github.com/yt-dlp/yt-dlp/actions/workflows/release.yml workflow on master
|
||||
-->
|
||||
|
||||
### 2025.06.09
|
||||
|
||||
#### Extractor changes
|
||||
- [Improve JSON LD thumbnails extraction](https://github.com/yt-dlp/yt-dlp/commit/85c8a405e3651dc041b758f4744d4fb3c4c55e01) ([#13368](https://github.com/yt-dlp/yt-dlp/issues/13368)) by [bashonly](https://github.com/bashonly), [doe1080](https://github.com/doe1080)
|
||||
- **10play**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/6d265388c6e943419ac99e9151cf75a3265f980f) ([#13349](https://github.com/yt-dlp/yt-dlp/issues/13349)) by [bashonly](https://github.com/bashonly)
|
||||
- **adobepass**
|
||||
- [Add Fubo MSO](https://github.com/yt-dlp/yt-dlp/commit/eee90acc47d7f8de24afaa8b0271ccaefdf6e88c) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
|
||||
- [Always add newer user-agent when required](https://github.com/yt-dlp/yt-dlp/commit/0ee1102268cf31b07f8a8318a47424c66b2f7378) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix Philo MSO authentication](https://github.com/yt-dlp/yt-dlp/commit/943083edcd3df45aaa597a6967bc6c95b720f54c) ([#13335](https://github.com/yt-dlp/yt-dlp/issues/13335)) by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
- [Rework to require software statement](https://github.com/yt-dlp/yt-dlp/commit/711c5d5d098fee2992a1a624b1c4b30364b91426) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123)
|
||||
- [Validate login URL before sending credentials](https://github.com/yt-dlp/yt-dlp/commit/89c1b349ad81318d9d3bea76c01c891696e58d38) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **aenetworks**
|
||||
- [Fix playlist extractors](https://github.com/yt-dlp/yt-dlp/commit/f37d599a697e82fe68b423865897d55bae34f373) ([#13408](https://github.com/yt-dlp/yt-dlp/issues/13408)) by [Sipherdrakon](https://github.com/Sipherdrakon)
|
||||
- [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/6693d6603358ae6beca834dbd822a7917498b813) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
|
||||
- **bilibilibangumi**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/13e55162719528d42d2133e16b65ff59a667a6e4) ([#13416](https://github.com/yt-dlp/yt-dlp/issues/13416)) by [c-basalt](https://github.com/c-basalt)
|
||||
- **brightcove**: new: [Adapt to new AdobePass requirement](https://github.com/yt-dlp/yt-dlp/commit/98f8eec956e3b16cb66a3d49cc71af3807db795e) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **cu.ntv.co.jp**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/aa863ddab9b1d104678e9cf39bb76f5b14fca660) ([#13302](https://github.com/yt-dlp/yt-dlp/issues/13302)) by [doe1080](https://github.com/doe1080), [nullpos](https://github.com/nullpos)
|
||||
- **go**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/2e5bf002dad16f5ce35aa2023d392c9e518fcd8f) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly), [maxbin123](https://github.com/maxbin123)
|
||||
- **nbc**: [Rework and adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/2d7949d5642bc37d1e71bf00c9a55260e5505d58) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **nobelprize**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/97ddfefeb4faba6e61cd80996c16952b8eab16f3) ([#13205](https://github.com/yt-dlp/yt-dlp/issues/13205)) by [doe1080](https://github.com/doe1080)
|
||||
- **odnoklassniki**: [Detect and raise when login is required](https://github.com/yt-dlp/yt-dlp/commit/148a1eb4c59e127965396c7a6e6acf1979de459e) ([#13361](https://github.com/yt-dlp/yt-dlp/issues/13361)) by [bashonly](https://github.com/bashonly)
|
||||
- **patreon**: [Fix m3u8 formats extraction](https://github.com/yt-dlp/yt-dlp/commit/e0d6c0822930f6e63f574d46d946a58b73ecd10c) ([#13266](https://github.com/yt-dlp/yt-dlp/issues/13266)) by [bashonly](https://github.com/bashonly) (With fixes in [1a8a03e](https://github.com/yt-dlp/yt-dlp/commit/1a8a03ea8d827107319a18076ee3505090667c5a))
|
||||
- **podchaser**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/538eb305673c26bff6a2b12f1c96375fe02ce41a) ([#13271](https://github.com/yt-dlp/yt-dlp/issues/13271)) by [bashonly](https://github.com/bashonly)
|
||||
- **sr**: mediathek: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/e3c605a61f4cc2de9059f37434fa108c3c20f58e) ([#13294](https://github.com/yt-dlp/yt-dlp/issues/13294)) by [doe1080](https://github.com/doe1080)
|
||||
- **stacommu**: [Avoid partial stream formats](https://github.com/yt-dlp/yt-dlp/commit/5d96527be80dc1ed1702d9cd548ff86de570ad70) ([#13412](https://github.com/yt-dlp/yt-dlp/issues/13412)) by [bashonly](https://github.com/bashonly)
|
||||
- **startrek**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/a8bf0011bde92b3f1324a98bfbd38932fd3ebe18) ([#13188](https://github.com/yt-dlp/yt-dlp/issues/13188)) by [doe1080](https://github.com/doe1080)
|
||||
- **svt**: play: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/e1b6062f8c4a3fa33c65269d48d09ec78de765a2) ([#13329](https://github.com/yt-dlp/yt-dlp/issues/13329)) by [barsnick](https://github.com/barsnick), [bashonly](https://github.com/bashonly)
|
||||
- **telecinco**: [Fix extractor](https://github.com/yt-dlp/yt-dlp/commit/03dba2012d9bd3f402fa8c2f122afba89bbd22a4) ([#13379](https://github.com/yt-dlp/yt-dlp/issues/13379)) by [bashonly](https://github.com/bashonly)
|
||||
- **theplatform**: [Improve metadata extraction](https://github.com/yt-dlp/yt-dlp/commit/ed108b3ea481c6a4b5215a9302ba92d74baa2425) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **toutiao**: [Add extractor](https://github.com/yt-dlp/yt-dlp/commit/f8051e3a61686c5db1de5f5746366ecfbc3ad20c) ([#13246](https://github.com/yt-dlp/yt-dlp/issues/13246)) by [doe1080](https://github.com/doe1080)
|
||||
- **turner**: [Adapt extractors to new AdobePass flow](https://github.com/yt-dlp/yt-dlp/commit/0daddc780d3ac5bebc3a3ec5b884d9243cbc0745) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitcasting**: [Fix password-protected livestream support](https://github.com/yt-dlp/yt-dlp/commit/52f9729c9a92ad4656d746ff0b1acecb87b3e96d) ([#13097](https://github.com/yt-dlp/yt-dlp/issues/13097)) by [bashonly](https://github.com/bashonly)
|
||||
- **twitter**: broadcast: [Support events URLs](https://github.com/yt-dlp/yt-dlp/commit/7794374de8afb20499b023107e2abfd4e6b93ee4) ([#13248](https://github.com/yt-dlp/yt-dlp/issues/13248)) by [doe1080](https://github.com/doe1080)
|
||||
- **umg**: de: [Rework extractor](https://github.com/yt-dlp/yt-dlp/commit/4e7c1ea346b510280218b47e8653dbbca3a69870) ([#13373](https://github.com/yt-dlp/yt-dlp/issues/13373)) by [doe1080](https://github.com/doe1080)
|
||||
- **vice**: [Mark extractors as broken](https://github.com/yt-dlp/yt-dlp/commit/6121559e027a04574690799c1776bc42bb51af31) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [bashonly](https://github.com/bashonly)
|
||||
- **vimeo**: [Extract subtitles from player subdomain](https://github.com/yt-dlp/yt-dlp/commit/c723c4e5e78263df178dbe69844a3d05f3ef9e35) ([#13350](https://github.com/yt-dlp/yt-dlp/issues/13350)) by [bashonly](https://github.com/bashonly)
|
||||
- **watchespn**: [Fix provider-locked content extraction](https://github.com/yt-dlp/yt-dlp/commit/b094747e93cfb0a2c53007120e37d0d84d41f030) ([#13131](https://github.com/yt-dlp/yt-dlp/issues/13131)) by [maxbin123](https://github.com/maxbin123)
|
||||
- **weverse**: [Support login with oauth refresh tokens](https://github.com/yt-dlp/yt-dlp/commit/3fe72e9eea38d9a58211cde42cfaa577ce020e2c) ([#13284](https://github.com/yt-dlp/yt-dlp/issues/13284)) by [bashonly](https://github.com/bashonly)
|
||||
- **youtube**
|
||||
- [Add `tv_simply` player client](https://github.com/yt-dlp/yt-dlp/commit/1fd0e88b67db53ad163393d6965f68e908fa70e3) ([#13389](https://github.com/yt-dlp/yt-dlp/issues/13389)) by [gamer191](https://github.com/gamer191)
|
||||
- [Extract srt subtitles](https://github.com/yt-dlp/yt-dlp/commit/231349786e8c42089c2e079ec94c0ea866c37999) ([#13411](https://github.com/yt-dlp/yt-dlp/issues/13411)) by [gamer191](https://github.com/gamer191)
|
||||
- [Fix `--mark-watched` support](https://github.com/yt-dlp/yt-dlp/commit/b5be29fa58ec98226e11621fd9c58585bcff6879) ([#13222](https://github.com/yt-dlp/yt-dlp/issues/13222)) by [brian6932](https://github.com/brian6932), [iednod55](https://github.com/iednod55)
|
||||
- [Fix automatic captions for some client combinations](https://github.com/yt-dlp/yt-dlp/commit/53ea743a9c158f8ca2d75a09ca44ba68606042d8) ([#13268](https://github.com/yt-dlp/yt-dlp/issues/13268)) by [bashonly](https://github.com/bashonly)
|
||||
- [Improve signature extraction debug output](https://github.com/yt-dlp/yt-dlp/commit/d30a49742cfa22e61c47df4ac0e7334d648fb85d) ([#13327](https://github.com/yt-dlp/yt-dlp/issues/13327)) by [bashonly](https://github.com/bashonly)
|
||||
- [Rework nsig function name extraction](https://github.com/yt-dlp/yt-dlp/commit/9e38b273b7ac942e7e9fc05a651ed810ab7d30ba) ([#13403](https://github.com/yt-dlp/yt-dlp/issues/13403)) by [Grub4K](https://github.com/Grub4K)
|
||||
- [nsig code improvements and cleanup](https://github.com/yt-dlp/yt-dlp/commit/f7bbf5a617f9ab54ef51eaef99be36e175b5e9c3) ([#13280](https://github.com/yt-dlp/yt-dlp/issues/13280)) by [bashonly](https://github.com/bashonly)
|
||||
- **zdf**: [Fix language extraction and format sorting](https://github.com/yt-dlp/yt-dlp/commit/db162b76f6bdece50babe2e0cacfe56888c2e125) ([#13313](https://github.com/yt-dlp/yt-dlp/issues/13313)) by [InvalidUsernameException](https://github.com/InvalidUsernameException)
|
||||
|
||||
#### Misc. changes
|
||||
- **build**
|
||||
- [Exclude `pkg_resources` from being collected](https://github.com/yt-dlp/yt-dlp/commit/cc749a8a3b8b6e5c05318868c72a403f376a1b38) ([#13320](https://github.com/yt-dlp/yt-dlp/issues/13320)) by [bashonly](https://github.com/bashonly)
|
||||
- [Fix macOS requirements caching](https://github.com/yt-dlp/yt-dlp/commit/201812100f315c6727a4418698d5b4e8a79863d4) ([#13328](https://github.com/yt-dlp/yt-dlp/issues/13328)) by [bashonly](https://github.com/bashonly)
|
||||
- **cleanup**: Miscellaneous: [339614a](https://github.com/yt-dlp/yt-dlp/commit/339614a173c74b42d63e858c446a9cae262a13af) by [bashonly](https://github.com/bashonly)
|
||||
- **test**: postprocessors: [Remove binary thumbnail test data](https://github.com/yt-dlp/yt-dlp/commit/a9b370069838e84d44ac7ad095d657003665885a) ([#13341](https://github.com/yt-dlp/yt-dlp/issues/13341)) by [bashonly](https://github.com/bashonly)
|
||||
|
||||
### 2025.05.22
|
||||
|
||||
#### Core changes
|
||||
|
5
Makefile
5
Makefile
@ -18,10 +18,11 @@ pypi-files: AUTHORS Changelog.md LICENSE README.md README.txt supportedsites \
|
||||
tar pypi-files lazy-extractors install uninstall
|
||||
|
||||
clean-test:
|
||||
rm -rf test/testdata/sigs/player-*.js tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
rm -rf tmp/ *.annotations.xml *.aria2 *.description *.dump *.frag \
|
||||
*.frag.aria2 *.frag.urls *.info.json *.live_chat.json *.meta *.part* *.tmp *.temp *.unknown_video *.ytdl \
|
||||
*.3gp *.ape *.ass *.avi *.desktop *.f4v *.flac *.flv *.gif *.jpeg *.jpg *.lrc *.m4a *.m4v *.mhtml *.mkv *.mov *.mp3 *.mp4 \
|
||||
*.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp
|
||||
*.mpg *.mpga *.oga *.ogg *.opus *.png *.sbv *.srt *.ssa *.swf *.tt *.ttml *.url *.vtt *.wav *.webloc *.webm *.webp \
|
||||
test/testdata/sigs/player-*.js test/testdata/thumbnails/empty.webp "test/testdata/thumbnails/foo %d bar/foo_%d."*
|
||||
clean-dist:
|
||||
rm -rf yt-dlp.1.temp.md yt-dlp.1 README.txt MANIFEST build/ dist/ .coverage cover/ yt-dlp.tar.gz completions/ \
|
||||
yt_dlp/extractor/lazy_extractors.py *.spec CONTRIBUTING.md.tmp yt-dlp yt-dlp.exe yt_dlp.egg-info/ AUTHORS
|
||||
|
@ -1795,9 +1795,9 @@ Note: In CLI, `ARG` can use `-` instead of `_`; e.g. `youtube:player-client"` be
|
||||
The following extractors use this feature:
|
||||
|
||||
#### youtube
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube/_base.py](https://github.com/yt-dlp/yt-dlp/blob/415b4c9f955b1a0391204bd24a7132590e7b3bdb/yt_dlp/extractor/youtube/_base.py#L402-L409) for the list of supported content language codes
|
||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||
|
@ -2,6 +2,7 @@
|
||||
set -e
|
||||
|
||||
source ~/.local/share/pipx/venvs/pyinstaller/bin/activate
|
||||
python -m devscripts.install_deps -o --include build
|
||||
python -m devscripts.install_deps --include secretstorage --include curl-cffi
|
||||
python -m devscripts.make_lazy_extractors
|
||||
python devscripts/update-version.py -c "${channel}" -r "${origin}" "${version}"
|
||||
|
@ -36,6 +36,9 @@ def main():
|
||||
f'--name={name}',
|
||||
'--icon=devscripts/logo.ico',
|
||||
'--upx-exclude=vcruntime140.dll',
|
||||
# Ref: https://github.com/yt-dlp/yt-dlp/issues/13311
|
||||
# https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
'--exclude-module=pkg_resources',
|
||||
'--noconfirm',
|
||||
'--additional-hooks-dir=yt_dlp/__pyinstaller',
|
||||
*opts,
|
||||
|
@ -65,7 +65,7 @@ build = [
|
||||
"build",
|
||||
"hatchling",
|
||||
"pip",
|
||||
"setuptools>=71.0.2", # 71.0.0 broke pyinstaller
|
||||
"setuptools>=71.0.2,<81", # See https://github.com/pyinstaller/pyinstaller/issues/9149
|
||||
"wheel",
|
||||
]
|
||||
dev = [
|
||||
|
@ -5,6 +5,8 @@ If a site is not listed here, it might still be supported by yt-dlp's embed extr
|
||||
Not all sites listed here are guaranteed to work; websites are constantly changing and sometimes this breaks yt-dlp's support for them.
|
||||
The only reliable way to check if a site is supported is to try it.
|
||||
|
||||
- **10play**: [*10play*](## "netrc machine")
|
||||
- **10play:season**
|
||||
- **17live**
|
||||
- **17live:clip**
|
||||
- **17live:vod**
|
||||
@ -295,7 +297,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **CNNIndonesia**
|
||||
- **ComedyCentral**
|
||||
- **ComedyCentralTV**
|
||||
- **ConanClassic**
|
||||
- **ConanClassic**: (**Currently broken**)
|
||||
- **CondeNast**: Condé Nast media group: Allure, Architectural Digest, Ars Technica, Bon Appétit, Brides, Condé Nast, Condé Nast Traveler, Details, Epicurious, GQ, Glamour, Golf Digest, SELF, Teen Vogue, The New Yorker, Vanity Fair, Vogue, W Magazine, WIRED
|
||||
- **CONtv**
|
||||
- **CookingChannel**
|
||||
@ -317,7 +319,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **CtsNews**: 華視新聞
|
||||
- **CTV**
|
||||
- **CTVNews**
|
||||
- **cu.ntv.co.jp**: Nippon Television Network
|
||||
- **cu.ntv.co.jp**: 日テレ無料TADA!
|
||||
- **CultureUnplugged**
|
||||
- **curiositystream**: [*curiositystream*](## "netrc machine")
|
||||
- **curiositystream:collections**: [*curiositystream*](## "netrc machine")
|
||||
@ -882,19 +884,19 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **Naver**
|
||||
- **Naver:live**
|
||||
- **navernow**
|
||||
- **nba**
|
||||
- **nba:channel**
|
||||
- **nba:embed**
|
||||
- **nba:watch**
|
||||
- **nba:watch:collection**
|
||||
- **nba:watch:embed**
|
||||
- **nba**: (**Currently broken**)
|
||||
- **nba:channel**: (**Currently broken**)
|
||||
- **nba:embed**: (**Currently broken**)
|
||||
- **nba:watch**: (**Currently broken**)
|
||||
- **nba:watch:collection**: (**Currently broken**)
|
||||
- **nba:watch:embed**: (**Currently broken**)
|
||||
- **NBC**
|
||||
- **NBCNews**
|
||||
- **nbcolympics**
|
||||
- **nbcolympics:stream**
|
||||
- **NBCSports**
|
||||
- **NBCSportsStream**
|
||||
- **NBCSportsVPlayer**
|
||||
- **nbcolympics:stream**: (**Currently broken**)
|
||||
- **NBCSports**: (**Currently broken**)
|
||||
- **NBCSportsStream**: (**Currently broken**)
|
||||
- **NBCSportsVPlayer**: (**Currently broken**)
|
||||
- **NBCStations**
|
||||
- **ndr**: NDR.de - Norddeutscher Rundfunk
|
||||
- **ndr:embed**
|
||||
@ -970,7 +972,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **Nitter**
|
||||
- **njoy**: N-JOY
|
||||
- **njoy:embed**
|
||||
- **NobelPrize**: (**Currently broken**)
|
||||
- **NobelPrize**
|
||||
- **NoicePodcast**
|
||||
- **NonkTube**
|
||||
- **NoodleMagazine**
|
||||
@ -1393,14 +1395,14 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **SpreakerShow**
|
||||
- **SpringboardPlatform**
|
||||
- **SproutVideo**
|
||||
- **sr:mediathek**: Saarländischer Rundfunk (**Currently broken**)
|
||||
- **sr:mediathek**: Saarländischer Rundfunk
|
||||
- **SRGSSR**
|
||||
- **SRGSSRPlay**: srf.ch, rts.ch, rsi.ch, rtr.ch and swissinfo.ch play sites
|
||||
- **StacommuLive**: [*stacommu*](## "netrc machine")
|
||||
- **StacommuVOD**: [*stacommu*](## "netrc machine")
|
||||
- **StagePlusVODConcert**: [*stageplus*](## "netrc machine")
|
||||
- **stanfordoc**: Stanford Open ClassRoom
|
||||
- **StarTrek**: (**Currently broken**)
|
||||
- **startrek**: STAR TREK
|
||||
- **startv**
|
||||
- **Steam**
|
||||
- **SteamCommunityBroadcast**
|
||||
@ -1423,12 +1425,11 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **SunPorno**
|
||||
- **sverigesradio:episode**
|
||||
- **sverigesradio:publication**
|
||||
- **SVT**
|
||||
- **SVTPage**
|
||||
- **SVTPlay**: SVT Play and Öppet arkiv
|
||||
- **SVTSeries**
|
||||
- **svt:page**
|
||||
- **svt:play**: SVT Play and Öppet arkiv
|
||||
- **svt:play:series**
|
||||
- **SwearnetEpisode**
|
||||
- **Syfy**: (**Currently broken**)
|
||||
- **Syfy**
|
||||
- **SYVDK**
|
||||
- **SztvHu**
|
||||
- **t-online.de**: (**Currently broken**)
|
||||
@ -1472,8 +1473,6 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **Telewebion**: (**Currently broken**)
|
||||
- **Tempo**
|
||||
- **TennisTV**: [*tennistv*](## "netrc machine")
|
||||
- **TenPlay**: [*10play*](## "netrc machine")
|
||||
- **TenPlaySeason**
|
||||
- **TF1**
|
||||
- **TFO**
|
||||
- **theatercomplextown:ppv**: [*theatercomplextown*](## "netrc machine")
|
||||
@ -1511,6 +1510,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **tokfm:podcast**
|
||||
- **ToonGoggles**
|
||||
- **tou.tv**: [*toutv*](## "netrc machine")
|
||||
- **toutiao**: 今日头条
|
||||
- **Toypics**: Toypics video (**Currently broken**)
|
||||
- **ToypicsUser**: Toypics user profile (**Currently broken**)
|
||||
- **TrailerAddict**: (**Currently broken**)
|
||||
@ -1600,7 +1600,7 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **UKTVPlay**
|
||||
- **UlizaPlayer**
|
||||
- **UlizaPortal**: ulizaportal.jp
|
||||
- **umg:de**: Universal Music Deutschland (**Currently broken**)
|
||||
- **umg:de**: Universal Music Deutschland
|
||||
- **Unistra**
|
||||
- **Unity**: (**Currently broken**)
|
||||
- **uol.com.br**
|
||||
@ -1623,9 +1623,9 @@ The only reliable way to check if a site is supported is to try it.
|
||||
- **VGTV**: VGTV, BTTV, FTV, Aftenposten and Aftonbladet
|
||||
- **vh1.com**
|
||||
- **vhx:embed**: [*vimeo*](## "netrc machine")
|
||||
- **vice**
|
||||
- **vice:article**
|
||||
- **vice:show**
|
||||
- **vice**: (**Currently broken**)
|
||||
- **vice:article**: (**Currently broken**)
|
||||
- **vice:show**: (**Currently broken**)
|
||||
- **Viddler**
|
||||
- **Videa**
|
||||
- **video.arnes.si**: Arnes Video
|
||||
|
@ -314,6 +314,20 @@ class TestInfoExtractor(unittest.TestCase):
|
||||
},
|
||||
{},
|
||||
),
|
||||
(
|
||||
# test thumbnail_url key without URL scheme
|
||||
r'''
|
||||
<script type="application/ld+json">
|
||||
{
|
||||
"@context": "https://schema.org",
|
||||
"@type": "VideoObject",
|
||||
"thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
|
||||
}</script>''',
|
||||
{
|
||||
'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
|
||||
},
|
||||
{},
|
||||
),
|
||||
]
|
||||
for html, expected_dict, search_json_ld_kwargs in _TESTS:
|
||||
expect_dict(
|
||||
|
@ -8,6 +8,8 @@ import unittest
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
|
||||
import subprocess
|
||||
|
||||
from yt_dlp import YoutubeDL
|
||||
from yt_dlp.utils import shell_quote
|
||||
from yt_dlp.postprocessor import (
|
||||
@ -47,7 +49,18 @@ class TestConvertThumbnail(unittest.TestCase):
|
||||
print('Skipping: ffmpeg not found')
|
||||
return
|
||||
|
||||
file = 'test/testdata/thumbnails/foo %d bar/foo_%d.{}'
|
||||
test_data_dir = 'test/testdata/thumbnails'
|
||||
generated_file = f'{test_data_dir}/empty.webp'
|
||||
|
||||
subprocess.check_call([
|
||||
pp.executable, '-y', '-f', 'lavfi', '-i', 'color=c=black:s=320x320',
|
||||
'-c:v', 'libwebp', '-pix_fmt', 'yuv420p', '-vframes', '1', generated_file,
|
||||
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
||||
|
||||
file = test_data_dir + '/foo %d bar/foo_%d.{}'
|
||||
initial_file = file.format('webp')
|
||||
os.replace(generated_file, initial_file)
|
||||
|
||||
tests = (('webp', 'png'), ('png', 'jpg'))
|
||||
|
||||
for inp, out in tests:
|
||||
@ -55,11 +68,13 @@ class TestConvertThumbnail(unittest.TestCase):
|
||||
if os.path.exists(out_file):
|
||||
os.remove(out_file)
|
||||
pp.convert_thumbnail(file.format(inp), out)
|
||||
assert os.path.exists(out_file)
|
||||
self.assertTrue(os.path.exists(out_file))
|
||||
|
||||
for _, out in tests:
|
||||
os.remove(file.format(out))
|
||||
|
||||
os.remove(initial_file)
|
||||
|
||||
|
||||
class TestExec(unittest.TestCase):
|
||||
def test_parse_cmd(self):
|
||||
@ -610,3 +625,7 @@ outpoint 10.000000
|
||||
self.assertEqual(
|
||||
r"'special '\'' characters '\'' galore'\'\'\'",
|
||||
self._pp._quote_for_ffmpeg("special ' characters ' galore'''"))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
@ -11,7 +11,7 @@ class TestGetWebPoContentBinding:
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
|
||||
*[(client, context, is_authenticated, expected) for client in [
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
|
||||
for context, is_authenticated, expected in [
|
||||
(PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||
(PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
||||
|
@ -49,7 +49,7 @@ class TestWebPoPCSP:
|
||||
|
||||
@pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
|
||||
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
|
||||
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
||||
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
||||
|
@ -320,6 +320,14 @@ _NSIG_TESTS = [
|
||||
'https://www.youtube.com/s/player/59b252b9/player_ias.vflset/en_US/base.js',
|
||||
'D3XWVpYgwhLLKNK4AGX', 'aZrQ1qWJ5yv5h',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
(
|
||||
'https://www.youtube.com/s/player/fc2a56a5/tv-player-ias.vflset/tv-player-ias.js',
|
||||
'qTKWg_Il804jd2kAC', 'OtUAm2W6gyzJjB9u',
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
|
BIN
test/testdata/thumbnails/foo %d bar/foo_%d.webp
vendored
BIN
test/testdata/thumbnails/foo %d bar/foo_%d.webp
vendored
Binary file not shown.
Before Width: | Height: | Size: 3.8 KiB |
0
test/testdata/thumbnails/foo %d bar/placeholder
vendored
Normal file
0
test/testdata/thumbnails/foo %d bar/placeholder
vendored
Normal file
@ -490,7 +490,7 @@ class YoutubeDL:
|
||||
The template is mapped on a dictionary with keys 'progress' and 'info'
|
||||
retry_sleep_functions: Dictionary of functions that takes the number of attempts
|
||||
as argument and returns the time to sleep in seconds.
|
||||
Allowed keys are 'http', 'fragment', 'file_access'
|
||||
Allowed keys are 'http', 'fragment', 'file_access', 'extractor'
|
||||
download_ranges: A callback function that gets called for every video with
|
||||
the signature (info_dict, ydl) -> Iterable[Section].
|
||||
Only the returned sections will be downloaded.
|
||||
|
@ -300,7 +300,6 @@ from .brainpop import (
|
||||
BrainPOPIlIE,
|
||||
BrainPOPJrIE,
|
||||
)
|
||||
from .bravotv import BravoTVIE
|
||||
from .breitbart import BreitBartIE
|
||||
from .brightcove import (
|
||||
BrightcoveLegacyIE,
|
||||
@ -1262,6 +1261,7 @@ from .nba import (
|
||||
)
|
||||
from .nbc import (
|
||||
NBCIE,
|
||||
BravoTVIE,
|
||||
NBCNewsIE,
|
||||
NBCOlympicsIE,
|
||||
NBCOlympicsStreamIE,
|
||||
@ -1269,6 +1269,7 @@ from .nbc import (
|
||||
NBCSportsStreamIE,
|
||||
NBCSportsVPlayerIE,
|
||||
NBCStationsIE,
|
||||
SyfyIE,
|
||||
)
|
||||
from .ndr import (
|
||||
NDRIE,
|
||||
@ -2016,13 +2017,11 @@ from .sverigesradio import (
|
||||
SverigesRadioPublicationIE,
|
||||
)
|
||||
from .svt import (
|
||||
SVTIE,
|
||||
SVTPageIE,
|
||||
SVTPlayIE,
|
||||
SVTSeriesIE,
|
||||
)
|
||||
from .swearnet import SwearnetEpisodeIE
|
||||
from .syfy import SyfyIE
|
||||
from .syvdk import SYVDKIE
|
||||
from .sztvhu import SztvHuIE
|
||||
from .tagesschau import TagesschauIE
|
||||
@ -2147,6 +2146,7 @@ from .toggle import (
|
||||
from .toggo import ToggoIE
|
||||
from .tonline import TOnlineIE
|
||||
from .toongoggles import ToonGogglesIE
|
||||
from .toutiao import ToutiaoIE
|
||||
from .toutv import TouTvIE
|
||||
from .toypics import (
|
||||
ToypicsIE,
|
||||
|
@ -3,6 +3,7 @@ import json
|
||||
import re
|
||||
import time
|
||||
import urllib.parse
|
||||
import uuid
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from .common import InfoExtractor
|
||||
@ -10,6 +11,7 @@ from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
NO_DEFAULT,
|
||||
ExtractorError,
|
||||
parse_qs,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
@ -45,6 +47,8 @@ MSO_INFO = {
|
||||
'name': 'Comcast XFINITY',
|
||||
'username_field': 'user',
|
||||
'password_field': 'passwd',
|
||||
'login_hostname': 'login.xfinity.com',
|
||||
'needs_newer_ua': True,
|
||||
},
|
||||
'TWC': {
|
||||
'name': 'Time Warner Cable | Spectrum',
|
||||
@ -74,6 +78,12 @@ MSO_INFO = {
|
||||
'name': 'Verizon FiOS',
|
||||
'username_field': 'IDToken1',
|
||||
'password_field': 'IDToken2',
|
||||
'login_hostname': 'ssoauth.verizon.com',
|
||||
},
|
||||
'Fubo': {
|
||||
'name': 'Fubo',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
},
|
||||
'Cablevision': {
|
||||
'name': 'Optimum/Cablevision',
|
||||
@ -1338,6 +1348,7 @@ MSO_INFO = {
|
||||
'name': 'Sling TV',
|
||||
'username_field': 'username',
|
||||
'password_field': 'password',
|
||||
'login_hostname': 'identity.sling.com',
|
||||
},
|
||||
'Suddenlink': {
|
||||
'name': 'Suddenlink',
|
||||
@ -1355,7 +1366,6 @@ MSO_INFO = {
|
||||
class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should end with BaseIE/InfoExtractor
|
||||
_SERVICE_PROVIDER_TEMPLATE = 'https://sp.auth.adobe.com/adobe-services/%s'
|
||||
_USER_AGENT = 'Mozilla/5.0 (X11; Linux i686; rv:47.0) Gecko/20100101 Firefox/47.0'
|
||||
_MODERN_USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0'
|
||||
_MVPD_CACHE = 'ap-mvpd'
|
||||
|
||||
_DOWNLOADING_LOGIN_PAGE = 'Downloading Provider Login Page'
|
||||
@ -1367,6 +1377,14 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
return super()._download_webpage_handle(
|
||||
*args, **kwargs)
|
||||
|
||||
@staticmethod
|
||||
def _get_mso_headers(mso_info):
|
||||
# yt-dlp's default user-agent is usually too old for some MSO's like Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
return {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:131.0) Gecko/20100101 Firefox/131.0',
|
||||
} if mso_info.get('needs_newer_ua') else {}
|
||||
|
||||
@staticmethod
|
||||
def _get_mvpd_resource(provider_id, title, guid, rating):
|
||||
channel = etree.Element('channel')
|
||||
@ -1382,7 +1400,13 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
resource_rating.text = rating
|
||||
return '<rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">' + etree.tostring(channel).decode() + '</rss>'
|
||||
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource):
|
||||
def _extract_mvpd_auth(self, url, video_id, requestor_id, resource, software_statement):
|
||||
mso_id = self.get_param('ap_mso')
|
||||
if mso_id:
|
||||
mso_info = MSO_INFO[mso_id]
|
||||
else:
|
||||
mso_info = {}
|
||||
|
||||
def xml_text(xml_str, tag):
|
||||
return self._search_regex(
|
||||
f'<{tag}>(.+?)</{tag}>', xml_str, tag)
|
||||
@ -1391,15 +1415,27 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
token_expires = unified_timestamp(re.sub(r'[_ ]GMT', '', xml_text(token, date_ele)))
|
||||
return token_expires and token_expires <= int(time.time())
|
||||
|
||||
def post_form(form_page_res, note, data={}):
|
||||
def post_form(form_page_res, note, data={}, validate_url=False):
|
||||
form_page, urlh = form_page_res
|
||||
post_url = self._html_search_regex(r'<form[^>]+action=(["\'])(?P<url>.+?)\1', form_page, 'post url', group='url')
|
||||
if not re.match(r'https?://', post_url):
|
||||
post_url = urllib.parse.urljoin(urlh.url, post_url)
|
||||
if validate_url:
|
||||
# This request is submitting credentials so we should validate it when possible
|
||||
url_parsed = urllib.parse.urlparse(post_url)
|
||||
expected_hostname = mso_info.get('login_hostname')
|
||||
if expected_hostname and expected_hostname != url_parsed.hostname:
|
||||
raise ExtractorError(
|
||||
f'Unexpected login URL hostname; expected "{expected_hostname}" but got '
|
||||
f'"{url_parsed.hostname}". Aborting before submitting credentials')
|
||||
if url_parsed.scheme != 'https':
|
||||
self.write_debug('Upgrading login URL scheme to https')
|
||||
post_url = urllib.parse.urlunparse(url_parsed._replace(scheme='https'))
|
||||
form_data = self._hidden_inputs(form_page)
|
||||
form_data.update(data)
|
||||
return self._download_webpage_handle(
|
||||
post_url, video_id, note, data=urlencode_postdata(form_data), headers={
|
||||
**self._get_mso_headers(mso_info),
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
})
|
||||
|
||||
@ -1432,40 +1468,72 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
}
|
||||
|
||||
guid = xml_text(resource, 'guid') if '<' in resource else resource
|
||||
count = 0
|
||||
while count < 2:
|
||||
for _ in range(2):
|
||||
requestor_info = self.cache.load(self._MVPD_CACHE, requestor_id) or {}
|
||||
authn_token = requestor_info.get('authn_token')
|
||||
if authn_token and is_expired(authn_token, 'simpleTokenExpires'):
|
||||
authn_token = None
|
||||
if not authn_token:
|
||||
mso_id = self.get_param('ap_mso')
|
||||
if mso_id:
|
||||
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
|
||||
if not username or not password:
|
||||
raise_mvpd_required()
|
||||
mso_info = MSO_INFO[mso_id]
|
||||
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
}, headers={
|
||||
# yt-dlp's default user-agent is usually too old for Comcast_SSO
|
||||
# See: https://github.com/yt-dlp/yt-dlp/issues/10848
|
||||
'User-Agent': self._MODERN_USER_AGENT,
|
||||
} if mso_id == 'Comcast_SSO' else None)
|
||||
elif not self._cookies_passed:
|
||||
if not mso_id:
|
||||
raise_mvpd_required()
|
||||
username, password = self._get_login_info('ap_username', 'ap_password', mso_id)
|
||||
if not username or not password:
|
||||
raise_mvpd_required()
|
||||
|
||||
if not mso_id:
|
||||
pass
|
||||
elif mso_id == 'Comcast_SSO':
|
||||
device_info, urlh = self._download_json_handle(
|
||||
'https://sp.auth.adobe.com/indiv/devices',
|
||||
video_id, 'Registering device with Adobe',
|
||||
data=json.dumps({'fingerprint': uuid.uuid4().hex}).encode(),
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'})
|
||||
|
||||
device_id = device_info['deviceId']
|
||||
mvpd_headers['pass_sfp'] = urlh.get_header('pass_sfp')
|
||||
mvpd_headers['Ap_21'] = device_id
|
||||
|
||||
registration = self._download_json(
|
||||
'https://sp.auth.adobe.com/o/client/register',
|
||||
video_id, 'Registering client with Adobe',
|
||||
data=json.dumps({'software_statement': software_statement}).encode(),
|
||||
headers={'Content-Type': 'application/json; charset=UTF-8'})
|
||||
|
||||
access_token = self._download_json(
|
||||
'https://sp.auth.adobe.com/o/client/token', video_id,
|
||||
'Obtaining access token', data=urlencode_postdata({
|
||||
'grant_type': 'client_credentials',
|
||||
'client_id': registration['client_id'],
|
||||
'client_secret': registration['client_secret'],
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
})['access_token']
|
||||
mvpd_headers['Authorization'] = f'Bearer {access_token}'
|
||||
|
||||
reg_code = self._download_json(
|
||||
f'https://sp.auth.adobe.com/reggie/v1/{requestor_id}/regcode',
|
||||
video_id, 'Obtaining registration code',
|
||||
data=urlencode_postdata({
|
||||
'requestor': requestor_id,
|
||||
'deviceId': device_id,
|
||||
'format': 'json',
|
||||
}),
|
||||
headers={
|
||||
'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
|
||||
'Authorization': f'Bearer {access_token}',
|
||||
})['code']
|
||||
|
||||
provider_redirect_page_res = self._download_webpage_handle(
|
||||
self._SERVICE_PROVIDER_TEMPLATE % 'authenticate/saml', video_id,
|
||||
'Downloading Provider Redirect Page', query={
|
||||
'noflash': 'true',
|
||||
'mso_id': mso_id,
|
||||
'requestor_id': requestor_id,
|
||||
'no_iframe': 'false',
|
||||
'domain_name': 'adobe.com',
|
||||
'redirect_url': url,
|
||||
'reg_code': reg_code,
|
||||
}, headers=self._get_mso_headers(mso_info))
|
||||
|
||||
if mso_id == 'Comcast_SSO':
|
||||
# Comcast page flow varies by video site and whether you
|
||||
# are on Comcast's network.
|
||||
provider_redirect_page, urlh = provider_redirect_page_res
|
||||
@ -1489,8 +1557,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
oauth_redirect_url = extract_redirect_url(
|
||||
provider_redirect_page, fatal=True)
|
||||
provider_login_page_res = self._download_webpage_handle(
|
||||
oauth_redirect_url, video_id,
|
||||
self._DOWNLOADING_LOGIN_PAGE)
|
||||
oauth_redirect_url, video_id, self._DOWNLOADING_LOGIN_PAGE,
|
||||
headers=self._get_mso_headers(mso_info))
|
||||
else:
|
||||
provider_login_page_res = post_form(
|
||||
provider_redirect_page_res,
|
||||
@ -1500,24 +1568,35 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
mvpd_confirm_page, urlh = mvpd_confirm_page_res
|
||||
if '<button class="submit" value="Resume">Resume</button>' in mvpd_confirm_page:
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Philo':
|
||||
# Philo has very unique authentication method
|
||||
self._download_webpage(
|
||||
'https://idp.philo.com/auth/init/login_code', video_id, 'Requesting auth code', data=urlencode_postdata({
|
||||
self._request_webpage(
|
||||
'https://idp.philo.com/auth/init/login_code', video_id,
|
||||
'Requesting Philo auth code', data=json.dumps({
|
||||
'ident': username,
|
||||
'device': 'web',
|
||||
'send_confirm_link': False,
|
||||
'send_token': True,
|
||||
}))
|
||||
'device_ident': f'web-{uuid.uuid4().hex}',
|
||||
'include_login_link': True,
|
||||
}).encode(), headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
|
||||
philo_code = getpass.getpass('Type auth code you have received [Return]: ')
|
||||
self._download_webpage(
|
||||
'https://idp.philo.com/auth/update/login_code', video_id, 'Submitting token', data=urlencode_postdata({
|
||||
'token': philo_code,
|
||||
}))
|
||||
self._request_webpage(
|
||||
'https://idp.philo.com/auth/update/login_code', video_id,
|
||||
'Submitting token', data=json.dumps({'token': philo_code}).encode(),
|
||||
headers={
|
||||
'Content-Type': 'application/json',
|
||||
'Accept': 'application/json',
|
||||
})
|
||||
|
||||
mvpd_confirm_page_res = self._download_webpage_handle('https://idp.philo.com/idp/submit', video_id, 'Confirming Philo Login')
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Verizon':
|
||||
@ -1539,7 +1618,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
provider_redirect_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
saml_login_page, urlh = saml_login_page_res
|
||||
if 'Please try again.' in saml_login_page:
|
||||
raise ExtractorError(
|
||||
@ -1560,7 +1639,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
[saml_login_page, saml_redirect_url], 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
if 'Please try again.' in saml_login_page:
|
||||
raise ExtractorError(
|
||||
'Failed to login, incorrect User ID or Password.')
|
||||
@ -1631,7 +1710,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.url)
|
||||
@ -1682,7 +1761,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
provider_login_page_res, 'Logging in', {
|
||||
mso_info['username_field']: username,
|
||||
mso_info['password_field']: password,
|
||||
})
|
||||
}, validate_url=True)
|
||||
|
||||
provider_refresh_redirect_url = extract_redirect_url(
|
||||
provider_association_redirect, url=urlh.url)
|
||||
@ -1699,6 +1778,27 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
query=hidden_data)
|
||||
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
elif mso_id == 'Fubo':
|
||||
_, urlh = provider_redirect_page_res
|
||||
|
||||
fubo_response = self._download_json(
|
||||
'https://api.fubo.tv/partners/tve/connect', video_id,
|
||||
'Authenticating with Fubo', 'Unable to authenticate with Fubo',
|
||||
query=parse_qs(urlh.url), data=json.dumps({
|
||||
'username': username,
|
||||
'password': password,
|
||||
}).encode(), headers={
|
||||
'Accept': 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
})
|
||||
|
||||
self._request_webpage(
|
||||
'https://sp.auth.adobe.com/adobe-services/oauth2', video_id,
|
||||
'Authenticating with Adobe', 'Failed to authenticate with Adobe',
|
||||
query={
|
||||
'code': fubo_response['code'],
|
||||
'state': fubo_response['state'],
|
||||
})
|
||||
else:
|
||||
# Some providers (e.g. DIRECTV NOW) have another meta refresh
|
||||
# based redirect that should be followed.
|
||||
@ -1717,7 +1817,8 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
}
|
||||
if mso_id in ('Cablevision', 'AlticeOne'):
|
||||
form_data['_eventId_proceed'] = ''
|
||||
mvpd_confirm_page_res = post_form(provider_login_page_res, 'Logging in', form_data)
|
||||
mvpd_confirm_page_res = post_form(
|
||||
provider_login_page_res, 'Logging in', form_data, validate_url=True)
|
||||
if mso_id != 'Rogers':
|
||||
post_form(mvpd_confirm_page_res, 'Confirming Login')
|
||||
|
||||
@ -1727,6 +1828,7 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
'Retrieving Session', data=urlencode_postdata({
|
||||
'_method': 'GET',
|
||||
'requestor_id': requestor_id,
|
||||
'reg_code': reg_code,
|
||||
}), headers=mvpd_headers)
|
||||
except ExtractorError as e:
|
||||
if not mso_id and isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
@ -1734,7 +1836,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
raise
|
||||
if '<pendingLogout' in session:
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
authn_token = unescapeHTML(xml_text(session, 'authnToken'))
|
||||
requestor_info['authn_token'] = authn_token
|
||||
@ -1755,7 +1856,6 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in authorize:
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
if '<error' in authorize:
|
||||
raise ExtractorError(xml_text(authorize, 'details'), expected=True)
|
||||
@ -1778,6 +1878,5 @@ class AdobePassIE(InfoExtractor): # XXX: Conventionally, base classes should en
|
||||
}), headers=mvpd_headers)
|
||||
if '<pendingLogout' in short_authorize:
|
||||
self.cache.store(self._MVPD_CACHE, requestor_id, {})
|
||||
count += 1
|
||||
continue
|
||||
return short_authorize
|
||||
|
@ -84,6 +84,8 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
'skip': '404 Not Found',
|
||||
}]
|
||||
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwNjg5ZmU2My00OTc5LTQxZmQtYWYxNC1hYjVlNmJjNWVkZWIiLCJuYmYiOjE1MzcxOTA2NzQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwNjc0fQ.Xl3AEduM0s1TxDQ6-XssdKIiLm261hhsEv1C1yo_nitIajZThSI9rXILqtIzO0aujoHhdzUnu_dUCq9ffiSBzEG632tTa1la-5tegHtce80cMhewBN4n2t8n9O5tiaPx8MPY8ALdm5wS7QzWE6DO_LTJKgE8Bl7Yv-CWJT4q4SywtNiQWLVOuhBRnDyfsRezxRwptw8qTn9dv5ZzUrVJaby5fDZ_nOncMKvegOgaKd5KEuCAGQ-mg-PSuValMjGuf6FwDguGaK7IyI5Y2oOrzXmD4Dj7q4WBg8w9QoZhtLeAU56mcsGILolku2R5FHlVLO9xhjResyt-pfmegOkpSw'
|
||||
|
||||
def _real_extract(self, url):
|
||||
show_path, episode_path = self._match_valid_url(url).groups()
|
||||
display_id = episode_path or show_path
|
||||
@ -152,7 +154,7 @@ class AdultSwimIE(TurnerBaseIE):
|
||||
# CDN_TOKEN_APP_ID from:
|
||||
# https://d2gg02c3xr550i.cloudfront.net/assets/asvp.e9c8bef24322d060ef87.bundle.js
|
||||
'appId': 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcHBJZCI6ImFzLXR2ZS1kZXNrdG9wLXB0enQ2bSIsInByb2R1Y3QiOiJ0dmUiLCJuZXR3b3JrIjoiYXMiLCJwbGF0Zm9ybSI6ImRlc2t0b3AiLCJpYXQiOjE1MzI3MDIyNzl9.BzSCk-WYOZ2GMCIaeVb8zWnzhlgnXuJTCu0jGp_VaZE',
|
||||
}, {
|
||||
}, self._SOFTWARE_STATEMENT, {
|
||||
'url': url,
|
||||
'site_name': 'AdultSwim',
|
||||
'auth_required': auth,
|
||||
|
@ -1,3 +1,5 @@
|
||||
import json
|
||||
|
||||
from .theplatform import ThePlatformIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -6,7 +8,6 @@ from ..utils import (
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
update_url_query,
|
||||
urlencode_postdata,
|
||||
)
|
||||
|
||||
|
||||
@ -20,13 +21,13 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_THEPLATFORM_KEY = '43jXaGRQud'
|
||||
_THEPLATFORM_SECRET = 'S10BPXHMlb'
|
||||
_DOMAIN_MAP = {
|
||||
'history.com': ('HISTORY', 'history'),
|
||||
'aetv.com': ('AETV', 'aetv'),
|
||||
'mylifetime.com': ('LIFETIME', 'lifetime'),
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc'),
|
||||
'fyi.tv': ('FYI', 'fyi'),
|
||||
'historyvault.com': (None, 'historyvault'),
|
||||
'biography.com': (None, 'biography'),
|
||||
'history.com': ('HISTORY', 'history', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzZlMTQ3ZS0zMzFhLTQxY2YtYTMwNC01MDA2NzNlOGYwYjYiLCJuYmYiOjE1Mzg2NjMzMDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMzA5fQ.n24-FVHLGXJe2D4atIQZ700aiXKIajKh5PWFoHJ40Az4itjtwwSFHnvufnoal3T8lYkwNLxce7H-IEGxIykRkZEdwq09pMKMT-ft9ASzE4vQ8fAWbf5ZgDME86x4Jq_YaxkRc9Ne0eShGhl8fgTJHvk07sfWcol61HJ7kU7K8FzzcHR0ucFQgA5VNd8RyjoGWY7c6VxnXR214LOpXsywmit04-vGJC102b_WA2EQfqI93UzG6M6l0EeV4n0_ijP3s8_i8WMJZ_uwnTafCIY6G_731i01dKXDLSFzG1vYglAwDa8DTcdrAAuIFFDF6QNGItCCmwbhjufjmoeVb7R1Gg'),
|
||||
'aetv.com': ('AETV', 'aetv', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI5Y2IwNjg2Yy03ODUxLTRiZDUtODcyMC00MjNlZTg1YTQ1NzMiLCJuYmYiOjE1Mzg2NjMyOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM4NjYzMjkwfQ.T5Elf0X4TndO4NEgqBas1gDxNHGPVk_daO2Ha5FBzVO6xi3zM7eavdAKfYMCN7gpWYJx03iADaVPtczO_t_aGZczDjpwJHgTUzDgvcLZAVsVDqtDIAMy3S846rPgT6UDbVoxurA7B2VTPm9phjrSXhejvd0LBO8MQL4AZ3sy2VmiPJ2noT1ily5PuHCYlkrT1fheO064duR__Cd9DQ5VTMnKjzY3Cx345CEwKDkUk5gwgxhXM-aY0eblehrq8VD81_aRM_O3tvh7nbTydHOnUpV-k_iKVi49gqz7Sf8zb6Zh5z2Uftn3vYCfE5NQuesitoRMnsH17nW7o_D59hkRgg'),
|
||||
'mylifetime.com': ('LIFETIME', 'lifetime', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJmODg0MDM1ZC1mZGRmLTRmYjgtYmRkMC05MzRhZDdiYTAwYTciLCJuYmYiOjE1NDkzOTI2NDQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQ5MzkyNjQ0fQ.vkTIaCpheKdKQd__2-3ec4qkcpbAhyCTvwe5iTl922ItSQfVhpEJG4wseVSNmBTrpBi0hvLedcw6Hj1_UuzBMVuVcCqLprU-pI8recEwL0u7G-eVkylsxe1OTUm1o3V6OykXQ9KlA-QQLL1neUhdhR1n5B1LZ4cmtBmiEpfgf4rFwXD1ScFylIcaWKLBqHoRBNUmxyTmoXXvn_A-GGSj9eCizFzY8W5uBwUcsoiw2Cr1skx7PbB2RSP1I5DsoIJKG-8XV1KS7MWl-fNLjE-hVAsI9znqfEEFcPBiv3LhCP4Nf4OIs7xAselMn0M0c8igRUZhURWX_hdygUAxkbKFtQ'),
|
||||
'fyi.tv': ('FYI', 'fyi', 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxOGZiOWM3Ny1mYmMzLTQxYTktYmE1Yi1lMzM0ZmUzNzU4NjEiLCJuYmYiOjE1ODc1ODAzNzcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NTgwMzc3fQ.AYDuipKswmIfLBfOjHRsfc5fMV5NmJUmiJnkpiep4VEw9QiXkygFj4bN06Si5tFc5Mee5TDrGzDpV6iuKbVpLT5kuqXhAn-Wozf5zKPsg_IpdEKO7gsiCq4calt72ct44KTqtKD_hVcoxQU24_HaJsRgXzu3B-6Ff6UrmsXkyvYifYVC9v2DSkdCuA02_IrlllzVT2kRuefUXgL4vQRtTFf77uYa0RKSTG7uVkiQ_AU41eXevKlO2qgtc14Hk5cZ7-ZNrDyMCXYA5ngdIHP7Gs9PWaFXT36PFHI_rC4EfxUABPzjQFxjpP75aX5qn8SH__HbM9q3hoPWgaEaf76qIQ'),
|
||||
'lifetimemovieclub.com': ('LIFETIMEMOVIECLUB', 'lmc', None),
|
||||
'historyvault.com': (None, 'historyvault', None),
|
||||
'biography.com': (None, 'biography', None),
|
||||
}
|
||||
|
||||
def _extract_aen_smil(self, smil_url, video_id, auth=None):
|
||||
@ -71,7 +72,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
}
|
||||
|
||||
def _extract_aetn_info(self, domain, filter_key, filter_value, url):
|
||||
requestor_id, brand = self._DOMAIN_MAP[domain]
|
||||
requestor_id, brand, software_statement = self._DOMAIN_MAP[domain]
|
||||
result = self._download_json(
|
||||
f'https://feeds.video.aetnd.com/api/v2/{brand}/videos',
|
||||
filter_value, query={f'filter[{filter_key}]': filter_value})
|
||||
@ -95,7 +96,7 @@ class AENetworksBaseIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
theplatform_metadata.get('AETN$PPL_pplProgramId') or theplatform_metadata.get('AETN$PPL_pplProgramId_OLD'),
|
||||
traverse_obj(theplatform_metadata, ('ratings', 0, 'rating')))
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
url, video_id, requestor_id, resource, software_statement)
|
||||
info.update(self._extract_aen_smil(media_url, video_id, auth))
|
||||
info.update({
|
||||
'title': title,
|
||||
@ -132,10 +133,11 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'tags': 'count:14',
|
||||
'categories': ['Mountain Men'],
|
||||
'episode_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode': 'Winter Is Coming',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'series': 'Mountain Men',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
@ -157,18 +159,18 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
'thumbnail': r're:^https?://.*\.jpe?g$',
|
||||
'chapters': 'count:4',
|
||||
'tags': 'count:23',
|
||||
'episode': 'Episode 1',
|
||||
'episode': 'Inlawful Entry',
|
||||
'episode_number': 1,
|
||||
'season': 'Season 9',
|
||||
'season_number': 9,
|
||||
'series': 'Duck Dynasty',
|
||||
'age_limit': 0,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'This video is only available for users of participating TV providers.',
|
||||
}, {
|
||||
'url': 'http://www.fyi.tv/shows/tiny-house-nation/season-1/episode-8',
|
||||
'only_matching': True,
|
||||
@ -203,18 +205,19 @@ class AENetworksIE(AENetworksBaseIE):
|
||||
class AENetworksListBaseIE(AENetworksBaseIE):
|
||||
def _call_api(self, resource, slug, brand, fields):
|
||||
return self._download_json(
|
||||
'https://yoga.appsvcs.aetnd.com/graphql',
|
||||
slug, query={'brand': brand}, data=urlencode_postdata({
|
||||
'https://yoga.appsvcs.aetnd.com/graphql', slug,
|
||||
query={'brand': brand}, headers={'Content-Type': 'application/json'},
|
||||
data=json.dumps({
|
||||
'query': '''{
|
||||
%s(slug: "%s") {
|
||||
%s
|
||||
}
|
||||
}''' % (resource, slug, fields), # noqa: UP031
|
||||
}))['data'][resource]
|
||||
}).encode())['data'][resource]
|
||||
|
||||
def _real_extract(self, url):
|
||||
domain, slug = self._match_valid_url(url).groups()
|
||||
_, brand = self._DOMAIN_MAP[domain]
|
||||
_, brand, _ = self._DOMAIN_MAP[domain]
|
||||
playlist = self._call_api(self._RESOURCE, slug, brand, self._FIELDS)
|
||||
base_url = f'http://watch.{domain}'
|
||||
|
||||
|
@ -816,6 +816,26 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'upload_date': '20111104',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'note': 'new playurlSSRData scheme',
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep678060',
|
||||
'info_dict': {
|
||||
'id': '678060',
|
||||
'ext': 'mp4',
|
||||
'series': '去你家吃饭好吗',
|
||||
'series_id': '6198',
|
||||
'season': '第二季',
|
||||
'season_id': '42542',
|
||||
'season_number': 2,
|
||||
'episode': '吴老二:你家大公鸡养不熟,能煮熟吗…',
|
||||
'episode_id': '678060',
|
||||
'episode_number': 61,
|
||||
'title': '一只小九九丫 吴老二:你家大公鸡养不熟,能煮熟吗…',
|
||||
'duration': 266.123,
|
||||
'timestamp': 1663315904,
|
||||
'upload_date': '20220916',
|
||||
'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.bilibili.com/bangumi/play/ep267851',
|
||||
'info_dict': {
|
||||
@ -879,12 +899,26 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'Extracting episode', query={'fnval': 12240, 'ep_id': episode_id},
|
||||
headers=headers))
|
||||
|
||||
geo_blocked = traverse_obj(play_info, (
|
||||
'raw', 'data', 'plugins', lambda _, v: v['name'] == 'AreaLimitPanel', 'config', 'is_block', {bool}, any))
|
||||
premium_only = play_info.get('code') == -10403
|
||||
play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
|
||||
|
||||
formats = self.extract_formats(play_info)
|
||||
if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
video_info = traverse_obj(play_info, (('result', ('raw', 'data')), 'video_info', {dict}, any)) or {}
|
||||
formats = self.extract_formats(video_info)
|
||||
|
||||
if not formats:
|
||||
if geo_blocked:
|
||||
self.raise_geo_restricted()
|
||||
elif premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage:
|
||||
self.raise_login_required('This video is for premium members only')
|
||||
|
||||
if traverse_obj(play_info, ((
|
||||
('result', 'play_check', 'play_detail'), # 'PLAY_PREVIEW' vs 'PLAY_WHOLE'
|
||||
('raw', 'data', 'play_video_type'), # 'preview' vs 'whole'
|
||||
), any, {lambda x: x in ('PLAY_PREVIEW', 'preview')})):
|
||||
self.report_warning(
|
||||
'Only preview format is available, '
|
||||
f'you have to become a premium member to access full video. {self._login_hint()}')
|
||||
|
||||
bangumi_info = self._download_json(
|
||||
'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
|
||||
@ -922,7 +956,7 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
|
||||
'season': str_or_none(season_title),
|
||||
'season_id': str_or_none(season_id),
|
||||
'season_number': season_number,
|
||||
'duration': float_or_none(play_info.get('timelength'), scale=1000),
|
||||
'duration': float_or_none(video_info.get('timelength'), scale=1000),
|
||||
'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
|
||||
'__post_extractor': self.extract_comments(aid),
|
||||
'http_headers': {'Referer': url},
|
||||
|
@ -1,188 +0,0 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
merge_dicts,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
|
||||
class BravoTVIE(AdobePassIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?P<site>bravotv|oxygen)\.com/(?:[^/]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'info_dict': {
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'upload_date': '20190314',
|
||||
'timestamp': 1552591860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'The Top Chef Season 16 Winner Is...',
|
||||
'duration': 190.357,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678410000,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266.03,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'description': 'md5:3170065c5c2f19548d72a4cbc254af63',
|
||||
'upload_date': '20180401',
|
||||
'timestamp': 1522623600,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629.051,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190617',
|
||||
'timestamp': 1560790800,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'duration': 68.235,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>', webpage, 'settings', display_id)
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
query = {
|
||||
'manifest': 'm3u',
|
||||
'formats': 'm3u,mpeg4',
|
||||
}
|
||||
|
||||
if tve:
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or 'HNK2IC'
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video', ''), display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = traverse_obj(settings, ('tve_adobe_auth', {dict})) or {}
|
||||
site = remove_end(site, 'tv')
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
tve.get('data-adobe-pass-resource-id') or auth.get('adobePassResourceId') or site,
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query.update({
|
||||
'switch': 'HLSServiceSecure',
|
||||
'auth': self._extract_mvpd_auth(
|
||||
url, release_pid, auth.get('adobePassRequestorId') or site, resource),
|
||||
})
|
||||
|
||||
else:
|
||||
ls_playlist = traverse_obj(settings, ('ls_playlist', ..., {dict}), get_all=False) or {}
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or 'PHSl-B'
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
metadata = traverse_obj(
|
||||
ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, {dict}), get_all=False)
|
||||
|
||||
tp_url = f'https://link.theplatform.com/s/{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
tp_metadata = self._download_json(
|
||||
update_url_query(tp_url, {'format': 'preview'}), video_id, fatal=False)
|
||||
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# prune pointless single chapters that span the entire duration from short videos
|
||||
if len(chapters) == 1 and not traverse_obj(chapters, (0, 'end_time')):
|
||||
chapters = None
|
||||
|
||||
m3u8_url = self._request_webpage(HEADRequest(
|
||||
update_url_query(f'{tp_url}/stream.m3u8', query)), video_id, 'Checking m3u8 URL').url
|
||||
if 'mpeg_cenc' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'chapters': chapters,
|
||||
**merge_dicts(traverse_obj(tp_metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'season_number': (('pl1$seasonNumber', 'nbcu$seasonNumber'), {int_or_none}),
|
||||
'episode_number': (('pl1$episodeNumber', 'nbcu$episodeNumber'), {int_or_none}),
|
||||
'series': (('pl1$show', 'nbcu$show'), (None, ...), {str}),
|
||||
'episode': (('title', 'pl1$episodeNumber', 'nbcu$episodeNumber'), {str_or_none}),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}),
|
||||
}, get_all=False), traverse_obj(metadata, {
|
||||
'title': 'title',
|
||||
'description': 'description',
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {unified_timestamp}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': 'episodeTitle',
|
||||
'series': 'show',
|
||||
})),
|
||||
}
|
@ -923,10 +923,18 @@ class BrightcoveNewIE(BrightcoveNewBaseIE):
|
||||
errors = json_data.get('errors')
|
||||
if errors and errors[0].get('error_subcode') == 'TVE_AUTH':
|
||||
custom_fields = json_data['custom_fields']
|
||||
missing_fields = ', '.join(
|
||||
key for key in ('source_url', 'software_statement') if not smuggled_data.get(key))
|
||||
if missing_fields:
|
||||
raise ExtractorError(
|
||||
f'Missing fields in smuggled data: {missing_fields}. '
|
||||
f'This video can be only extracted from the webpage where it is embedded. '
|
||||
f'Pass the URL of the embedding webpage instead of the Brightcove URL', expected=True)
|
||||
tve_token = self._extract_mvpd_auth(
|
||||
smuggled_data['source_url'], video_id,
|
||||
custom_fields['bcadobepassrequestorid'],
|
||||
custom_fields['bcadobepassresourceid'])
|
||||
custom_fields['bcadobepassresourceid'],
|
||||
smuggled_data['software_statement'])
|
||||
json_data = self._download_json(
|
||||
api_url, video_id, headers={
|
||||
'Accept': f'application/json;pk={policy_key}',
|
||||
|
@ -1675,9 +1675,9 @@ class InfoExtractor:
|
||||
'ext': mimetype2ext(e.get('encodingFormat')),
|
||||
'title': unescapeHTML(e.get('name')),
|
||||
'description': unescapeHTML(e.get('description')),
|
||||
'thumbnails': [{'url': unescapeHTML(url)}
|
||||
for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
|
||||
if url_or_none(url)],
|
||||
'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), {
|
||||
'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
|
||||
})),
|
||||
'duration': parse_duration(e.get('duration')),
|
||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||
# author can be an instance of 'Organization' or 'Person' types.
|
||||
|
@ -206,7 +206,7 @@ class DouyuTVIE(DouyuBaseIE):
|
||||
'is_live': True,
|
||||
**traverse_obj(room, {
|
||||
'display_id': ('url', {str}, {lambda i: i[1:]}),
|
||||
'title': ('room_name', {unescapeHTML}),
|
||||
'title': ('room_name', {str}, {unescapeHTML}),
|
||||
'description': ('show_details', {str}),
|
||||
'uploader': ('nickname', {str}),
|
||||
'thumbnail': ('room_src', {url_or_none}),
|
||||
|
@ -64,7 +64,7 @@ class DreiSatIE(ZDFBaseIE):
|
||||
'title': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'description': 'md5:bae51bfc22f15563ce3acbf97d2e8844',
|
||||
'duration': 5399.0,
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1743329640903',
|
||||
'thumbnail': 'https://www.3sat.de/assets/buchmesse-kerkeling-100~original?cb=1747256996338',
|
||||
'chapters': 'count:24',
|
||||
'episode': 'dein buch - Das Beste von der Leipziger Buchmesse 2025 - Teil 1',
|
||||
'episode_id': 'POS_1ef236cc-b390-401e-acd0-4fb4b04315fb',
|
||||
|
@ -329,6 +329,7 @@ class WatchESPNIE(AdobePassIE):
|
||||
}]
|
||||
|
||||
_API_KEY = 'ZXNwbiZicm93c2VyJjEuMC4w.ptUt7QxsteaRruuPmGZFaJByOoqKvDP2a5YkInHrc7c'
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIyZGJmZWM4My03OWE1LTQyNzEtYTVmZC04NTZjYTMxMjRjNjMiLCJuYmYiOjE1NDAyMTI3NjEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTQwMjEyNzYxfQ.yaK3r4AI2uLVvsyN1GLzqzgzRlxMPtasSaiYYBV0wIstqih5tvjTmeoLmi8Xy9Kp_U7Md-bOffwiyK3srHkpUkhhwXLH2x6RPjmS1tPmhaG7-3LBcHTf2ySPvXhVf7cN4ngldawK4tdtLtsw6rF_JoZE2yaC6XbS2F51nXSFEDDnOQWIHEQRG3aYAj-38P2CLGf7g-Yfhbp5cKXeksHHQ90u3eOO4WH0EAjc9oO47h33U8KMEXxJbvjV5J8Va2G2fQSgLDZ013NBI3kQnE313qgqQh2feQILkyCENpB7g-TVBreAjOaH1fU471htSoGGYepcAXv-UDtpgitDiLy7CQ'
|
||||
|
||||
def _call_bamgrid_api(self, path, video_id, payload=None, headers={}):
|
||||
if 'Authorization' not in headers:
|
||||
@ -405,8 +406,8 @@ class WatchESPNIE(AdobePassIE):
|
||||
|
||||
# TV Provider required
|
||||
else:
|
||||
resource = self._get_mvpd_resource('ESPN', video_data['name'], video_id, None)
|
||||
auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource).encode()
|
||||
resource = self._get_mvpd_resource('espn1', video_data['name'], video_id, None)
|
||||
auth = self._extract_mvpd_auth(url, video_id, 'ESPN', resource, self._SOFTWARE_STATEMENT).encode()
|
||||
|
||||
asset = self._download_json(
|
||||
f'https://watch.auth.api.espn.com/video/auth/media/{video_id}/asset?apikey=uiqlbgzdwuru14v627vdusswb',
|
||||
|
@ -7,161 +7,157 @@ from ..utils import (
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
parse_age_limit,
|
||||
remove_end,
|
||||
remove_start,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
urlencode_postdata,
|
||||
)
|
||||
from ..utils.traversal import traverse_obj
|
||||
|
||||
|
||||
class GoIE(AdobePassIE):
|
||||
_SITE_INFO = {
|
||||
'abc': {
|
||||
'brand': '001',
|
||||
'requestor_id': 'ABC',
|
||||
'requestor_id': 'dtci',
|
||||
'provider_id': 'ABC',
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI4OTcwMjlkYS0yYjM1LTQyOWUtYWQ0NS02ZjZiZjVkZTdhOTUiLCJuYmYiOjE2MjAxNzM5NjksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjIwMTczOTY5fQ.SC69DVJWSL8sIe-vVUrP6xS_kzHKqwz9PdKYexs_y-f7Vin6mM-7S-W1TE_-K55O0pyf-TL4xYgvm6LIye8CckG-nZfVwNPV4huduov0jmIcxCQFeUwkHULG2IaA44wfBVUBdaHgkhPweZ2amjycO_IXtez-gBXOLbE3B7Gx9j_5ISCFtyVUblThKfoGyQv6KT6t8Vpmc4ZSKCCQp74KWFFypydb9ucego1taW_nQD06Cdf4yByLd6NaTBceMcIKbug9b9gxFm3XBgJ5q3z7KGo1Kr6XalAV5j4m-fQ91wczlTilX8FM4AljMupyRM9mA_aEADILQ4hS79q4SM0w6w',
|
||||
},
|
||||
'freeform': {
|
||||
'brand': '002',
|
||||
'requestor_id': 'ABCFamily',
|
||||
},
|
||||
'watchdisneychannel': {
|
||||
'brand': '004',
|
||||
'resource_id': 'Disney',
|
||||
},
|
||||
'watchdisneyjunior': {
|
||||
'brand': '008',
|
||||
'resource_id': 'DisneyJunior',
|
||||
},
|
||||
'watchdisneyxd': {
|
||||
'brand': '009',
|
||||
'resource_id': 'DisneyXD',
|
||||
'provider_id': 'ABCFamily',
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhZWM2MGYyNC0xYzRjLTQ1NzQtYjc0Zi03ZmM4N2E5YWMzMzgiLCJuYmYiOjE1ODc2NjU5MjMsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTg3NjY1OTIzfQ.flCn3dhvmvPnWmV0JV8Fm0YFyj07yPez9-n1GFEwVIm_S2wQVWbWyJhqsAyLZVFrhOMZYTqmPS3OHxGwTwXkEYn6PD7o_vIVG3oqi-Xn1m5jRt_Gazw5qEtpat6VE7bvKGSD3ZhcidOrsCk8NcYyq75u61NHDvSl81pcedJjVRVUpsqrEwmo0aVbA0C8PX3ri0mEbGvkMKvHn8E60xp-PSE-VK8SDT0plwPu_TwUszkZ6-_I8_2xcv_WBqcXFkAVg7Q-iNJXgQvmNsrpcrYuLvi6hEH4ZLtoDcXU6MhwTQAJTiHSo8x9aHX1_qFP09CzlNOFQbC2ZEJdP9SvA53SLQ',
|
||||
},
|
||||
'disneynow': {
|
||||
'brand': '011',
|
||||
'brand': '011', # also: '004', '008', '009'
|
||||
'requestor_id': 'DisneyChannels',
|
||||
'provider_id': 'DisneyChannels',
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1MzAzNTRiOS04NDNiLTRkNjAtYTQ3ZS0yNzk1MzlkOTIyNTciLCJuYmYiOjE1NTg5ODc0NDksImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTU4OTg3NDQ5fQ.Jud6YS6-J2h0h6po0oMheDym0qRTJQGj4kzacrz4DFuEwhcBkkykW6pF5pKuAUJy9HCZ40oDAHe2KcTlDJjCZF5tDaUEfdihakZ9cC_rG7MU-QoRne8qaB_dPDKwGuk-ZyWD8eV3zwTJmbGo8hDxYTEU81YNCxwhyc_BPDr5TYiubbmpP3_pTnXmSpuL58isJ2peSKWlX9BacuXtBY25c_QnPFKk-_EETm7IHkTpDazde1QfHWGu4s4yJpKGk8RVVujVG6h6ELlL-ZeYLilBm7iS7h1TYG1u7fJhyZRL7isaom6NvAzsvN3ngss1fLwt8decP8wzdFHrbYTdTjW8qw',
|
||||
'resource_id': 'Disney',
|
||||
},
|
||||
'fxnow.fxnetworks': {
|
||||
'brand': '025',
|
||||
'fxnetworks': {
|
||||
'brand': '025', # also: '020'
|
||||
'requestor_id': 'dtci',
|
||||
'provider_id': 'fx', # also 'fxx', 'fxm'
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIzYWRhYWZiNC02OTAxLTRlYzktOTdmNy1lYWZkZTJkODJkN2EiLCJuYmYiOjE1NjIwMjQwNzYsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDI0MDc2fQ.dhKMpZK50AObbZYrMiYPSfWtzXHUaeMP3jrIY4Cgfvh0GaEgk0Mns_zp78jypFeZgRtPVleQMQDNq2YEloRLcAGqP1aa6WVDglnK77ZWUm4IKai14Rwf3A6YBhSRoO2_lMmUGkuTf6gZY-kMIPqBYKqzTQiQl4HbniPFodIzFRiuI9QJVrkoyTGrJL4oqiX08PoFI3Z-TOti1Heu3EbFC-GveQHhlinYrzU7rbiAqLEz7FImtfBDsnXX1Y3uJDLYM3Bq4Oh0nrzTv1Fd62wNsCNErHHIbELidh1zZF0ujvt7ReuZUwAitm0UhEJ7OxNOUbEQWtae6pVNscvdvTFMpg',
|
||||
},
|
||||
'nationalgeographic': {
|
||||
'brand': '026', # also '023'
|
||||
'requestor_id': 'dtci',
|
||||
'provider_id': 'ngc', # also 'ngw'
|
||||
'software_statement': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIxMzE4YTM1Ni05Mjc4LTQ4NjEtYTFmNi1jMTIzMzg1ZWMzYzMiLCJuYmYiOjE1NjIwMjM4MjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTYyMDIzODI4fQ.Le-2OzF9-jrhJ7ZfWtLWk5iSHGVZoxeU1w0_fO--Heli0OwRZsRq2slSmx-oZTzxuWmAgDEiBkWSDcDK6sM25DrCLsdsJa3MBuZ-slBRtH8aq3HpNoqqLkU-vg6gRUEKMtwBUtwCu_9aKUCayYtndWv4b1DjVQeSrteOW5NNudWVYleAe0kxeNJQHo5If9SCzDudKVJktFUjhNks4QPOC_uONPkRRlL9D0fNvtOY-LRFckfcHhf5z9l1iZjeukV0YhdKnuw1wyiaWrQXBUDiBfbkCRd2DM-KnelqPxfiXCaTjGKDURRBO3pz33ebge3IFXSiU5vl4qHQ8xvunzGpFw',
|
||||
},
|
||||
}
|
||||
_VALID_URL = r'''(?x)
|
||||
https?://
|
||||
(?P<sub_domain>
|
||||
(?:{}\.)?go|fxnow\.fxnetworks|
|
||||
(?:www\.)?(?:abc|freeform|disneynow)
|
||||
)\.com/
|
||||
(?:
|
||||
(?:[^/]+/)*(?P<id>[Vv][Dd][Kk][Aa]\w+)|
|
||||
(?:[^/]+/)*(?P<display_id>[^/?\#]+)
|
||||
)
|
||||
'''.format(r'\.|'.join(list(_SITE_INFO.keys())))
|
||||
_URL_PATH_RE = r'(?:video|episode|movies-and-specials)/(?P<id>[\da-f]{8}-(?:[\da-f]{4}-){3}[\da-f]{12})'
|
||||
_VALID_URL = [
|
||||
fr'https?://(?:www\.)?(?P<site>abc)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://(?:www\.)?(?P<site>freeform)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://(?:www\.)?(?P<site>disneynow)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://fxnow\.(?P<site>fxnetworks)\.com/{_URL_PATH_RE}',
|
||||
fr'https?://(?:www\.)?(?P<site>nationalgeographic)\.com/tv/{_URL_PATH_RE}',
|
||||
]
|
||||
_TESTS = [{
|
||||
'url': 'http://abc.go.com/shows/designated-survivor/video/most-recent/VDKA3807643',
|
||||
'url': 'https://abc.com/episode/4192c0e6-26e5-47a8-817b-ce8272b9e440/playlist/PL551127435',
|
||||
'info_dict': {
|
||||
'id': 'VDKA3807643',
|
||||
'id': 'VDKA10805898',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Traitor in the White House',
|
||||
'description': 'md5:05b009d2d145a1e85d25111bd37222e8',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'This content is no longer available.',
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/big-hero-6-the-series',
|
||||
'info_dict': {
|
||||
'title': 'Doraemon',
|
||||
'id': 'SH55574025',
|
||||
},
|
||||
'playlist_mincount': 51,
|
||||
}, {
|
||||
'url': 'http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood',
|
||||
'info_dict': {
|
||||
'id': 'VDKA3609139',
|
||||
'title': 'This Guilty Blood',
|
||||
'description': 'md5:f18e79ad1c613798d95fdabfe96cd292',
|
||||
'title': 'Switch the Flip',
|
||||
'description': 'To help get Brian’s life in order, Stewie and Brian swap bodies using a machine that Stewie invents.',
|
||||
'age_limit': 14,
|
||||
'duration': 1297,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'Family Guy',
|
||||
'season': 'Season 16',
|
||||
'season_number': 16,
|
||||
'episode': 'Episode 17',
|
||||
'episode_number': 17,
|
||||
'timestamp': 1746082800.0,
|
||||
'upload_date': '20250501',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://disneynow.com/episode/21029660-ba06-4406-adb0-a9a78f6e265e/playlist/PL553044961',
|
||||
'info_dict': {
|
||||
'id': 'VDKA39546942',
|
||||
'ext': 'mp4',
|
||||
'title': 'Zero Friends Again',
|
||||
'description': 'Relationships fray under the pressures of a difficult journey.',
|
||||
'age_limit': 0,
|
||||
'duration': 1721,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'Star Wars: Skeleton Crew',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 6',
|
||||
'episode_number': 6,
|
||||
'timestamp': 1746946800.0,
|
||||
'upload_date': '20250511',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/episode/09f4fa6f-c293-469e-aebe-32c9ca5842a7/playlist/PL554408064',
|
||||
'info_dict': {
|
||||
'id': 'VDKA38112033',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Return of Jerry',
|
||||
'description': 'The vampires’ long-lost fifth roommate returns. Written by Paul Simms; directed by Kyle Newacheck.',
|
||||
'age_limit': 17,
|
||||
'duration': 1493,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'What We Do in the Shadows',
|
||||
'season': 'Season 6',
|
||||
'season_number': 6,
|
||||
'episode': 'Episode 1',
|
||||
'upload_date': '20170102',
|
||||
'season': 'Season 2',
|
||||
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abcf/Shadowhunters/video/201/ae5f75608d86bf88aa4f9f4aa76ab1b7/579x325-Q100_ae5f75608d86bf88aa4f9f4aa76ab1b7.jpg',
|
||||
'duration': 2544,
|
||||
'season_number': 2,
|
||||
'series': 'Shadowhunters',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1483387200,
|
||||
'ext': 'mp4',
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'timestamp': 1729573200.0,
|
||||
'upload_date': '20241022',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://abc.com/shows/the-rookie/episode-guide/season-04/12-the-knock',
|
||||
'url': 'https://www.freeform.com/episode/bda0eaf7-761a-4838-aa44-96f794000844/playlist/PL553044961',
|
||||
'info_dict': {
|
||||
'id': 'VDKA26050359',
|
||||
'title': 'The Knock',
|
||||
'description': 'md5:0c2947e3ada4c31f28296db7db14aa64',
|
||||
'age_limit': 14,
|
||||
'id': 'VDKA39007340',
|
||||
'ext': 'mp4',
|
||||
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/abc/TheRookie/video/412/daf830d06e83b11eaf5c0a299d993ae3/1556x876-Q75_daf830d06e83b11eaf5c0a299d993ae3.jpg',
|
||||
'episode': 'Episode 12',
|
||||
'season_number': 4,
|
||||
'season': 'Season 4',
|
||||
'timestamp': 1642975200,
|
||||
'episode_number': 12,
|
||||
'upload_date': '20220123',
|
||||
'series': 'The Rookie',
|
||||
'duration': 2572,
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'title': 'Angel\'s Landing',
|
||||
'description': 'md5:91bf084e785c968fab16734df7313446',
|
||||
'age_limit': 14,
|
||||
'duration': 2523,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'How I Escaped My Cult',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 2',
|
||||
'episode_number': 2,
|
||||
'timestamp': 1740038400.0,
|
||||
'upload_date': '20250220',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://fxnow.fxnetworks.com/shows/better-things/video/vdka12782841',
|
||||
'url': 'https://www.nationalgeographic.com/tv/episode/ca694661-1186-41ae-8089-82f64d69b16d/playlist/PL554408064',
|
||||
'info_dict': {
|
||||
'id': 'VDKA12782841',
|
||||
'title': 'First Look: Better Things - Season 2',
|
||||
'description': 'md5:fa73584a95761c605d9d54904e35b407',
|
||||
'id': 'VDKA39492078',
|
||||
'ext': 'mp4',
|
||||
'age_limit': 14,
|
||||
'upload_date': '20170825',
|
||||
'duration': 161,
|
||||
'series': 'Better Things',
|
||||
'thumbnail': 'http://cdn1.edgedatg.com/aws/v2/fx/BetterThings/video/12782841/b6b05e58264121cc2c98811318e6d507/1556x876-Q75_b6b05e58264121cc2c98811318e6d507.jpg',
|
||||
'timestamp': 1503661074,
|
||||
},
|
||||
'params': {
|
||||
'geo_bypass_ip_block': '3.244.239.0/24',
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'title': 'Heart of the Emperors',
|
||||
'description': 'md5:4fc50a2878f030bb3a7eac9124dca677',
|
||||
'age_limit': 0,
|
||||
'duration': 2775,
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'series': 'Secrets of the Penguins',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Episode 1',
|
||||
'episode_number': 1,
|
||||
'timestamp': 1745204400.0,
|
||||
'upload_date': '20250421',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/the-catch/episode-guide/season-01/10-the-wedding',
|
||||
'url': 'https://www.freeform.com/movies-and-specials/c38281fc-9f8f-47c7-8220-22394f9df2e1',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'http://abc.go.com/shows/world-news-tonight/episode-guide/2017-02/17-021717-intense-stand-off-between-man-with-rifle-and-police-in-oakland',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 004
|
||||
'url': 'http://disneynow.go.com/shows/big-hero-6-the-series/season-01/episode-10-mr-sparkles-loses-his-sparkle/vdka4637915',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
# brand 008
|
||||
'url': 'http://disneynow.go.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://disneynow.com/shows/minnies-bow-toons/video/happy-campers/vdka4872013',
|
||||
'only_matching': True,
|
||||
}, {
|
||||
'url': 'https://www.freeform.com/shows/cruel-summer/episode-guide/season-01/01-happy-birthday-jeanette-turner',
|
||||
'url': 'https://abc.com/video/219a454a-172c-41bf-878a-d169e6bc0bdc/playlist/PL5523098420',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
@ -171,58 +167,29 @@ class GoIE(AdobePassIE):
|
||||
f'http://api.contents.watchabc.go.com/vp2/ws/contents/3000/videos/{brand}/001/-1/{show_id}/-1/{video_id}/-1/-1.json',
|
||||
display_id)['video']
|
||||
|
||||
def _extract_global_var(self, name, webpage, video_id):
|
||||
return self._search_json(
|
||||
fr'window\[["\']{re.escape(name)}["\']\]\s*=',
|
||||
webpage, f'{name.strip("_")} JSON', video_id)
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
sub_domain = remove_start(remove_end(mobj.group('sub_domain') or '', '.go'), 'www.')
|
||||
video_id, display_id = mobj.group('id', 'display_id')
|
||||
site_info = self._SITE_INFO.get(sub_domain, {})
|
||||
brand = site_info.get('brand')
|
||||
if not video_id or not site_info:
|
||||
webpage = self._download_webpage(url, display_id or video_id)
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
r'["\']__abc_com__["\']\s*\]\s*=\s*({.+?})\s*;', webpage,
|
||||
'data', default='{}'),
|
||||
display_id or video_id, fatal=False)
|
||||
# https://abc.com/shows/modern-family/episode-guide/season-01/101-pilot
|
||||
layout = try_get(data, lambda x: x['page']['content']['video']['layout'], dict)
|
||||
video_id = None
|
||||
if layout:
|
||||
video_id = try_get(
|
||||
layout,
|
||||
(lambda x: x['videoid'], lambda x: x['video']['id']),
|
||||
str)
|
||||
if not video_id:
|
||||
video_id = self._search_regex(
|
||||
(
|
||||
# There may be inner quotes, e.g. data-video-id="'VDKA3609139'"
|
||||
# from http://freeform.go.com/shows/shadowhunters/episodes/season-2/1-this-guilty-blood
|
||||
r'data-video-id=["\']*(VDKA\w+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\w+)',
|
||||
# https://abc.com/shows/the-rookie/episode-guide/season-02/03-the-bet
|
||||
r'\b(?:video)?id["\']\s*:\s*["\'](VDKA\w+)',
|
||||
), webpage, 'video id', default=video_id)
|
||||
if not site_info:
|
||||
brand = self._search_regex(
|
||||
(r'data-brand=\s*["\']\s*(\d+)',
|
||||
r'data-page-brand=\s*["\']\s*(\d+)'), webpage, 'brand',
|
||||
default='004')
|
||||
site_info = next(
|
||||
si for _, si in self._SITE_INFO.items()
|
||||
if si.get('brand') == brand)
|
||||
if not video_id:
|
||||
# show extraction works for Disney, DisneyJunior and DisneyXD
|
||||
# ABC and Freeform has different layout
|
||||
show_id = self._search_regex(r'data-show-id=["\']*(SH\d+)', webpage, 'show id')
|
||||
videos = self._extract_videos(brand, show_id=show_id)
|
||||
show_title = self._search_regex(r'data-show-title="([^"]+)"', webpage, 'show title', fatal=False)
|
||||
entries = []
|
||||
for video in videos:
|
||||
entries.append(self.url_result(
|
||||
video['url'], 'Go', video.get('id'), video.get('title')))
|
||||
entries.reverse()
|
||||
return self.playlist_result(entries, show_id, show_title)
|
||||
site, display_id = self._match_valid_url(url).group('site', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
config = self._extract_global_var('__CONFIG__', webpage, display_id)
|
||||
data = self._extract_global_var(config['globalVar'], webpage, display_id)
|
||||
video_id = traverse_obj(data, (
|
||||
'page', 'content', 'video', 'layout', (('video', 'id'), 'videoid'), {str}, any))
|
||||
if not video_id:
|
||||
video_id = self._search_regex([
|
||||
# data-track-video_id="VDKA39492078"
|
||||
# data-track-video_id_code="vdka39492078"
|
||||
# data-video-id="'VDKA3609139'"
|
||||
r'data-(?:track-)?video[_-]id(?:_code)?=["\']*((?:vdka|VDKA)\d+)',
|
||||
# page.analytics.videoIdCode
|
||||
r'\bvideoIdCode["\']\s*:\s*["\']((?:vdka|VDKA)\d+)'], webpage, 'video ID')
|
||||
|
||||
site_info = self._SITE_INFO[site]
|
||||
brand = site_info['brand']
|
||||
video_data = self._extract_videos(brand, video_id)[0]
|
||||
video_id = video_data['id']
|
||||
title = video_data['title']
|
||||
@ -238,26 +205,31 @@ class GoIE(AdobePassIE):
|
||||
if ext == 'm3u8':
|
||||
video_type = video_data.get('type')
|
||||
data = {
|
||||
'video_id': video_data['id'],
|
||||
'video_id': video_id,
|
||||
'video_type': video_type,
|
||||
'brand': brand,
|
||||
'device': '001',
|
||||
'app_name': 'webplayer-abc',
|
||||
}
|
||||
if video_data.get('accesslevel') == '1':
|
||||
requestor_id = site_info.get('requestor_id', 'DisneyChannels')
|
||||
provider_id = site_info['provider_id']
|
||||
software_statement = traverse_obj(data, ('app', 'config', (
|
||||
('features', 'auth', 'softwareStatement'),
|
||||
('tvAuth', 'SOFTWARE_STATEMENTS', 'PRODUCTION'),
|
||||
), {str}, any)) or site_info['software_statement']
|
||||
resource = site_info.get('resource_id') or self._get_mvpd_resource(
|
||||
requestor_id, title, video_id, None)
|
||||
provider_id, title, video_id, None)
|
||||
auth = self._extract_mvpd_auth(
|
||||
url, video_id, requestor_id, resource)
|
||||
url, video_id, site_info['requestor_id'], resource, software_statement)
|
||||
data.update({
|
||||
'token': auth,
|
||||
'token_type': 'ap',
|
||||
'adobe_requestor_id': requestor_id,
|
||||
'adobe_requestor_id': provider_id,
|
||||
})
|
||||
else:
|
||||
self._initialize_geo_bypass({'countries': ['US']})
|
||||
entitlement = self._download_json(
|
||||
'https://api.entitlement.watchabc.go.com/vp2/ws-secure/entitlement/2020/authorize.json',
|
||||
'https://prod.gatekeeper.us-abc.symphony.edgedatg.go.com/vp2/ws-secure/entitlement/2020/playmanifest_secure.json',
|
||||
video_id, data=urlencode_postdata(data))
|
||||
errors = entitlement.get('errors', {}).get('errors', [])
|
||||
if errors:
|
||||
@ -267,7 +239,7 @@ class GoIE(AdobePassIE):
|
||||
error['message'], countries=['US'])
|
||||
error_message = ', '.join([error['message'] for error in errors])
|
||||
raise ExtractorError(f'{self.IE_NAME} said: {error_message}', expected=True)
|
||||
asset_url += '?' + entitlement['uplynkData']['sessionKey']
|
||||
asset_url += '?' + entitlement['entitlement']['uplynkData']['sessionKey']
|
||||
fmts, subs = self._extract_m3u8_formats_and_subtitles(
|
||||
asset_url, video_id, 'mp4', m3u8_id=format_id or 'hls', fatal=False)
|
||||
formats.extend(fmts)
|
||||
|
@ -1,7 +1,5 @@
|
||||
from .telecinco import TelecincoBaseIE
|
||||
from ..networking.exceptions import HTTPError
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
)
|
||||
@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
||||
webpage = self._download_webpage(url, display_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||
raise
|
||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
webpage = self._download_akamai_webpage(url, display_id)
|
||||
pre_player = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||
webpage, 'Pre Player', display_id)['prePlayer']
|
||||
|
@ -19,7 +19,8 @@ from ..utils import (
|
||||
class NBACVPBaseIE(TurnerBaseIE):
|
||||
def _extract_nba_cvp_info(self, path, video_id, fatal=False):
|
||||
return self._extract_cvp_info(
|
||||
f'http://secure.nba.com/{path}', video_id, {
|
||||
# XXX: The 3rd argument (None) needs to be the AdobePass software_statement
|
||||
f'http://secure.nba.com/{path}', video_id, None, {
|
||||
'default': {
|
||||
'media_src': 'http://nba.cdn.turner.com/nba/big',
|
||||
},
|
||||
@ -94,6 +95,7 @@ class NBAWatchBaseIE(NBACVPBaseIE):
|
||||
|
||||
|
||||
class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:watch:embed'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'embed\?.*?\bid=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
@ -115,6 +117,7 @@ class NBAWatchEmbedIE(NBAWatchBaseIE):
|
||||
|
||||
|
||||
class NBAWatchIE(NBAWatchBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:watch'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'(?:nba/)?video/(?P<id>.+?(?=/index\.html)|(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -167,6 +170,7 @@ class NBAWatchIE(NBAWatchBaseIE):
|
||||
|
||||
|
||||
class NBAWatchCollectionIE(NBAWatchBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:watch:collection'
|
||||
_VALID_URL = NBAWatchBaseIE._VALID_URL_BASE + r'list/collection/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -336,6 +340,7 @@ class NBABaseIE(NBACVPBaseIE):
|
||||
|
||||
|
||||
class NBAEmbedIE(NBABaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:embed'
|
||||
_VALID_URL = r'https?://secure\.nba\.com/assets/amp/include/video/(?:topI|i)frame\.html\?.*?\bcontentId=(?P<id>[^?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -358,6 +363,7 @@ class NBAEmbedIE(NBABaseIE):
|
||||
|
||||
|
||||
class NBAIE(NBABaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?!{NBABaseIE._CHANNEL_PATH_REGEX})video/(?P<id>(?:[^/]+/)*[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
@ -385,6 +391,7 @@ class NBAIE(NBABaseIE):
|
||||
|
||||
|
||||
class NBAChannelIE(NBABaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nba:channel'
|
||||
_VALID_URL = NBABaseIE._VALID_URL_BASE + f'(?:{NBABaseIE._CHANNEL_PATH_REGEX})/(?P<id>[^/?#&]+)'
|
||||
_TESTS = [{
|
||||
|
@ -6,7 +6,7 @@ import xml.etree.ElementTree
|
||||
|
||||
from .adobepass import AdobePassIE
|
||||
from .common import InfoExtractor
|
||||
from .theplatform import ThePlatformIE, default_ns
|
||||
from .theplatform import ThePlatformBaseIE, ThePlatformIE, default_ns
|
||||
from ..networking import HEADRequest
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
@ -14,26 +14,130 @@ from ..utils import (
|
||||
UserNotLive,
|
||||
clean_html,
|
||||
determine_ext,
|
||||
extract_attributes,
|
||||
float_or_none,
|
||||
get_element_html_by_class,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
make_archive_id,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_duration,
|
||||
parse_iso8601,
|
||||
remove_end,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unescapeHTML,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/]+/video/[^/]+/(?P<id>(?:NBCE|n)?\d+))'
|
||||
class NBCUniversalBaseIE(ThePlatformBaseIE):
|
||||
_GEO_COUNTRIES = ['US']
|
||||
_GEO_BYPASS = False
|
||||
_M3U8_RE = r'https?://[^/?#]+/prod/[\w-]+/(?P<folders>[^?#]+/)cmaf/mpeg_(?:cbcs|cenc)\w*/master_cmaf\w*\.m3u8'
|
||||
|
||||
def _download_nbcu_smil_and_extract_m3u8_url(self, tp_path, video_id, query):
|
||||
smil = self._download_xml(
|
||||
f'https://link.theplatform.com/s/{tp_path}', video_id,
|
||||
'Downloading SMIL manifest', 'Failed to download SMIL manifest', query={
|
||||
**query,
|
||||
'format': 'SMIL', # XXX: Do not confuse "format" with "formats"
|
||||
'manifest': 'm3u',
|
||||
'switch': 'HLSServiceSecure', # Or else we get broken mp4 http URLs instead of HLS
|
||||
}, headers=self.geo_verification_headers())
|
||||
|
||||
ns = f'//{{{default_ns}}}'
|
||||
if url := traverse_obj(smil, (f'{ns}video/@src', lambda _, v: determine_ext(v) == 'm3u8', any)):
|
||||
return url
|
||||
|
||||
exc = traverse_obj(smil, (f'{ns}param', lambda _, v: v.get('name') == 'exception', '@value', any))
|
||||
if exc == 'GeoLocationBlocked':
|
||||
self.raise_geo_restricted(countries=self._GEO_COUNTRIES)
|
||||
raise ExtractorError(traverse_obj(smil, (f'{ns}ref/@abstract', ..., any)), expected=exc == 'Expired')
|
||||
|
||||
def _extract_nbcu_formats_and_subtitles(self, tp_path, video_id, query):
|
||||
# formats='mpeg4' will return either a working m3u8 URL or an m3u8 template for non-DRM HLS
|
||||
# formats='m3u+none,mpeg4' may return DRM HLS but w/the "folders" needed for non-DRM template
|
||||
query['formats'] = 'm3u+none,mpeg4'
|
||||
m3u8_url = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
|
||||
|
||||
if mobj := re.fullmatch(self._M3U8_RE, m3u8_url):
|
||||
query['formats'] = 'mpeg4'
|
||||
m3u8_tmpl = self._download_nbcu_smil_and_extract_m3u8_url(tp_path, video_id, query)
|
||||
# Example: https://vod-lf-oneapp-prd.akamaized.net/prod/video/{folders}master_hls.m3u8
|
||||
if '{folders}' in m3u8_tmpl:
|
||||
self.write_debug('Found m3u8 URL template, formatting URL path')
|
||||
m3u8_url = m3u8_tmpl.format(folders=mobj.group('folders'))
|
||||
|
||||
if '/mpeg_cenc' in m3u8_url or '/mpeg_cbcs' in m3u8_url:
|
||||
self.report_drm(video_id)
|
||||
|
||||
return self._extract_m3u8_formats_and_subtitles(m3u8_url, video_id, 'mp4', m3u8_id='hls')
|
||||
|
||||
def _extract_nbcu_video(self, url, display_id, old_ie_key=None):
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
settings = self._search_json(
|
||||
r'<script[^>]+data-drupal-selector="drupal-settings-json"[^>]*>',
|
||||
webpage, 'settings', display_id)
|
||||
|
||||
query = {}
|
||||
tve = extract_attributes(get_element_html_by_class('tve-video-deck-app', webpage) or '')
|
||||
if tve:
|
||||
account_pid = tve.get('data-mpx-media-account-pid') or tve['data-mpx-account-pid']
|
||||
account_id = tve['data-mpx-media-account-id']
|
||||
metadata = self._parse_json(
|
||||
tve.get('data-normalized-video') or '', display_id, fatal=False, transform_source=unescapeHTML)
|
||||
video_id = tve.get('data-guid') or metadata['guid']
|
||||
if tve.get('data-entitlement') == 'auth':
|
||||
auth = settings['tve_adobe_auth']
|
||||
release_pid = tve['data-release-pid']
|
||||
resource = self._get_mvpd_resource(
|
||||
tve.get('data-adobe-pass-resource-id') or auth['adobePassResourceId'],
|
||||
tve['data-title'], release_pid, tve.get('data-rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, release_pid, auth['adobePassRequestorId'],
|
||||
resource, auth['adobePassSoftwareStatement'])
|
||||
else:
|
||||
ls_playlist = traverse_obj(settings, (
|
||||
'ls_playlist', lambda _, v: v['defaultGuid'], any, {require('LS playlist')}))
|
||||
video_id = ls_playlist['defaultGuid']
|
||||
account_pid = ls_playlist.get('mpxMediaAccountPid') or ls_playlist['mpxAccountPid']
|
||||
account_id = ls_playlist['mpxMediaAccountId']
|
||||
metadata = traverse_obj(ls_playlist, ('videos', lambda _, v: v['guid'] == video_id, any)) or {}
|
||||
|
||||
tp_path = f'{account_pid}/media/guid/{account_id}/{video_id}'
|
||||
formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query)
|
||||
tp_metadata = self._download_theplatform_metadata(tp_path, video_id, fatal=False)
|
||||
parsed_info = self._parse_theplatform_metadata(tp_metadata)
|
||||
self._merge_subtitles(parsed_info['subtitles'], target=subtitles)
|
||||
|
||||
return {
|
||||
**parsed_info,
|
||||
**traverse_obj(metadata, {
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'duration': ('durationInSeconds', {int_or_none}),
|
||||
'timestamp': ('airDate', {parse_iso8601}),
|
||||
'thumbnail': ('thumbnailUrl', {url_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'episode': ('episodeTitle', {str}),
|
||||
'series': ('show', {str}),
|
||||
}),
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'_old_archive_ids': [make_archive_id(old_ie_key, video_id)] if old_ie_key else None,
|
||||
}
|
||||
|
||||
|
||||
class NBCIE(NBCUniversalBaseIE):
|
||||
_VALID_URL = r'https?(?P<permalink>://(?:www\.)?nbc\.com/(?:classic-tv/)?[^/?#]+/video/[^/?#]+/(?P<id>\w+))'
|
||||
_TESTS = [
|
||||
{
|
||||
'url': 'http://www.nbc.com/the-tonight-show/video/jimmy-fallon-surprises-fans-at-ben-jerrys/2848237',
|
||||
@ -49,47 +153,20 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_number': 86,
|
||||
'season': 'Season 2',
|
||||
'season_number': 2,
|
||||
'series': 'Tonight Show: Jimmy Fallon',
|
||||
'duration': 237.0,
|
||||
'chapters': 'count:1',
|
||||
'tags': 'count:4',
|
||||
'series': 'Tonight',
|
||||
'duration': 236.504,
|
||||
'tags': 'count:2',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': ['Series/The Tonight Show Starring Jimmy Fallon'],
|
||||
'media_type': 'Full Episode',
|
||||
'age_limit': 14,
|
||||
'_old_archive_ids': ['theplatform 2848237'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'http://www.nbc.com/saturday-night-live/video/star-wars-teaser/2832821',
|
||||
'info_dict': {
|
||||
'id': '2832821',
|
||||
'ext': 'mp4',
|
||||
'title': 'Star Wars Teaser',
|
||||
'description': 'md5:0b40f9cbde5b671a7ff62fceccc4f442',
|
||||
'timestamp': 1417852800,
|
||||
'upload_date': '20141206',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'skip': 'page not found',
|
||||
},
|
||||
{
|
||||
# HLS streams requires the 'hdnea3' cookie
|
||||
'url': 'http://www.nbc.com/Kings/video/goliath/n1806',
|
||||
'info_dict': {
|
||||
'id': '101528f5a9e8127b107e98c5e6ce4638',
|
||||
'ext': 'mp4',
|
||||
'title': 'Goliath',
|
||||
'description': 'When an unknown soldier saves the life of the King\'s son in battle, he\'s thrust into the limelight and politics of the kingdom.',
|
||||
'timestamp': 1237100400,
|
||||
'upload_date': '20090315',
|
||||
'uploader': 'NBCU-COM',
|
||||
},
|
||||
'skip': 'page not found',
|
||||
},
|
||||
{
|
||||
# manifest url does not have extension
|
||||
'url': 'https://www.nbc.com/the-golden-globe-awards/video/oprah-winfrey-receives-cecil-b-de-mille-award-at-the-2018-golden-globes/3646439',
|
||||
'info_dict': {
|
||||
'id': '3646439',
|
||||
@ -99,48 +176,47 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'episode_number': 1,
|
||||
'season': 'Season 75',
|
||||
'season_number': 75,
|
||||
'series': 'The Golden Globe Awards',
|
||||
'series': 'Golden Globes',
|
||||
'description': 'Oprah Winfrey receives the Cecil B. de Mille Award at the 75th Annual Golden Globe Awards.',
|
||||
'uploader': 'NBCU-COM',
|
||||
'upload_date': '20180107',
|
||||
'timestamp': 1515312000,
|
||||
'duration': 570.0,
|
||||
'duration': 569.703,
|
||||
'tags': 'count:8',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'chapters': 'count:1',
|
||||
'media_type': 'Highlight',
|
||||
'age_limit': 0,
|
||||
'categories': ['Series/The Golden Globe Awards'],
|
||||
'_old_archive_ids': ['theplatform 3646439'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
# new video_id format
|
||||
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
|
||||
# Needs to be extracted from webpage instead of GraphQL
|
||||
'url': 'https://www.nbc.com/paris2024/video/ali-truwit-found-purpose-pool-after-her-life-changed/para24_sww_alitruwittodayshow_240823',
|
||||
'info_dict': {
|
||||
'id': 'NBCE125189978',
|
||||
'id': 'para24_sww_alitruwittodayshow_240823',
|
||||
'ext': 'mp4',
|
||||
'title': 'Ben\'s First Leap | NBC\'s Quantum Leap',
|
||||
'description': 'md5:a82762449b7ec4bb83291a7b355ebf8e',
|
||||
'uploader': 'NBCU-COM',
|
||||
'series': 'Quantum Leap',
|
||||
'season': 'Season 1',
|
||||
'season_number': 1,
|
||||
'episode': 'Ben\'s First Leap | NBC\'s Quantum Leap',
|
||||
'episode_number': 1,
|
||||
'duration': 170.171,
|
||||
'chapters': [],
|
||||
'timestamp': 1663956155,
|
||||
'upload_date': '20220923',
|
||||
'tags': 'count:10',
|
||||
'age_limit': 0,
|
||||
'title': 'Ali Truwit found purpose in the pool after her life changed',
|
||||
'description': 'md5:c16d7489e1516593de1cc5d3f39b9bdb',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'duration': 311.077,
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'categories': ['Series/Quantum Leap 2022'],
|
||||
'media_type': 'Highlight',
|
||||
'episode': 'Ali Truwit found purpose in the pool after her life changed',
|
||||
'timestamp': 1724435902.0,
|
||||
'upload_date': '20240823',
|
||||
'_old_archive_ids': ['theplatform para24_sww_alitruwittodayshow_240823'],
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
},
|
||||
{
|
||||
'url': 'https://www.nbc.com/quantum-leap/video/bens-first-leap-nbcs-quantum-leap/NBCE125189978',
|
||||
'only_matching': True,
|
||||
},
|
||||
{
|
||||
'url': 'https://www.nbc.com/classic-tv/charles-in-charge/video/charles-in-charge-pilot/n3310',
|
||||
'only_matching': True,
|
||||
@ -151,6 +227,7 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'only_matching': True,
|
||||
},
|
||||
]
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiI1Yzg2YjdkYy04NDI3LTRjNDUtOGQwZi1iNDkzYmE3MmQwYjQiLCJuYmYiOjE1Nzg3MDM2MzEsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTc4NzAzNjMxfQ.QQKIsBhAjGQTMdAqRTqhcz2Cddr4Y2hEjnSiOeKKki4nLrkDOsjQMmqeTR0hSRarraxH54wBgLvsxI7LHwKMvr7G8QpynNAxylHlQD3yhN9tFhxt4KR5wW3as02B-W2TznK9bhNWPKIyHND95Uo2Mi6rEQoq8tM9O09WPWaanE5BX_-r6Llr6dPq5F0Lpx2QOn2xYRb1T4nFxdFTNoss8GBds8OvChTiKpXMLHegLTc1OS4H_1a8tO_37jDwSdJuZ8iTyRLV4kZ2cpL6OL5JPMObD4-HQiec_dfcYgMKPiIfP9ZqdXpec2SVaCLsWEk86ZYvD97hLIQrK5rrKd1y-A'
|
||||
|
||||
def _real_extract(self, url):
|
||||
permalink, video_id = self._match_valid_url(url).groups()
|
||||
@ -196,62 +273,50 @@ class NBCIE(ThePlatformIE): # XXX: Do not subclass from concrete IE
|
||||
'userId': '0',
|
||||
}),
|
||||
})['data']['bonanzaPage']['metadata']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
'switch': 'HLSServiceSecure',
|
||||
}
|
||||
|
||||
if not video_data:
|
||||
# Some videos are not available via GraphQL API
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = self._search_json(
|
||||
r'<script>\s*PRELOAD\s*=', webpage, 'video data',
|
||||
video_id)['pages'][urllib.parse.urlparse(url).path]['base']['metadata']
|
||||
|
||||
video_id = video_data['mpxGuid']
|
||||
tp_path = 'NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id)
|
||||
tpm = self._download_theplatform_metadata(tp_path, video_id)
|
||||
title = tpm.get('title') or video_data.get('secondaryTitle')
|
||||
tp_path = f'NnzsPC/media/guid/{video_data["mpxAccountId"]}/{video_id}'
|
||||
tpm = self._download_theplatform_metadata(tp_path, video_id, fatal=False)
|
||||
title = traverse_obj(tpm, ('title', {str})) or video_data.get('secondaryTitle')
|
||||
query = {}
|
||||
if video_data.get('locked'):
|
||||
resource = self._get_mvpd_resource(
|
||||
video_data.get('resourceId') or 'nbcentertainment',
|
||||
title, video_id, video_data.get('rating'))
|
||||
video_data['resourceId'], title, video_id, video_data.get('rating'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'nbcentertainment', resource)
|
||||
theplatform_url = smuggle_url(update_url_query(
|
||||
'http://link.theplatform.com/s/NnzsPC/media/guid/{}/{}'.format(video_data.get('mpxAccountId') or '2410887629', video_id),
|
||||
query), {'force_smil_url': True})
|
||||
url, video_id, 'nbcentertainment', resource, self._SOFTWARE_STATEMENT)
|
||||
|
||||
# Empty string or 0 can be valid values for these. So the check must be `is None`
|
||||
description = video_data.get('description')
|
||||
if description is None:
|
||||
description = tpm.get('description')
|
||||
episode_number = int_or_none(video_data.get('episodeNumber'))
|
||||
if episode_number is None:
|
||||
episode_number = int_or_none(tpm.get('nbcu$airOrder'))
|
||||
rating = video_data.get('rating')
|
||||
if rating is None:
|
||||
try_get(tpm, lambda x: x['ratings'][0]['rating'])
|
||||
season_number = int_or_none(video_data.get('seasonNumber'))
|
||||
if season_number is None:
|
||||
season_number = int_or_none(tpm.get('nbcu$seasonNumber'))
|
||||
series = video_data.get('seriesShortTitle')
|
||||
if series is None:
|
||||
series = tpm.get('nbcu$seriesShortTitle')
|
||||
tags = video_data.get('keywords')
|
||||
if tags is None or len(tags) == 0:
|
||||
tags = tpm.get('keywords')
|
||||
formats, subtitles = self._extract_nbcu_formats_and_subtitles(tp_path, video_id, query)
|
||||
parsed_info = self._parse_theplatform_metadata(tpm)
|
||||
self._merge_subtitles(parsed_info['subtitles'], target=subtitles)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'age_limit': parse_age_limit(rating),
|
||||
'description': description,
|
||||
'episode': title,
|
||||
'episode_number': episode_number,
|
||||
**traverse_obj(video_data, {
|
||||
'description': ('description', {str}, filter),
|
||||
'episode': ('secondaryTitle', {str}, filter),
|
||||
'episode_number': ('episodeNumber', {int_or_none}),
|
||||
'season_number': ('seasonNumber', {int_or_none}),
|
||||
'age_limit': ('rating', {parse_age_limit}),
|
||||
'tags': ('keywords', ..., {str}, filter, all, filter),
|
||||
'series': ('seriesShortTitle', {str}),
|
||||
}),
|
||||
**parsed_info,
|
||||
'id': video_id,
|
||||
'ie_key': 'ThePlatform',
|
||||
'season_number': season_number,
|
||||
'series': series,
|
||||
'tags': tags,
|
||||
'title': title,
|
||||
'url': theplatform_url,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'_old_archive_ids': [make_archive_id('ThePlatform', video_id)],
|
||||
}
|
||||
|
||||
|
||||
class NBCSportsVPlayerIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL_BASE = r'https?://(?:vplayer\.nbcsports\.com|(?:www\.)?nbcsports\.com/vplayer)/'
|
||||
_VALID_URL = _VALID_URL_BASE + r'(?:[^/]+/)+(?P<id>[0-9a-zA-Z_]+)'
|
||||
_EMBED_REGEX = [rf'(?:iframe[^>]+|var video|div[^>]+data-(?:mpx-)?)[sS]rc\s?=\s?"(?P<url>{_VALID_URL_BASE}[^\"]+)']
|
||||
@ -286,6 +351,7 @@ class NBCSportsVPlayerIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCSportsIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?nbcsports\.com//?(?!vplayer/)(?:[^/]+/)+(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TESTS = [{
|
||||
@ -321,6 +387,7 @@ class NBCSportsIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCSportsStreamIE(AdobePassIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://stream\.nbcsports\.com/.+?\bpid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://stream.nbcsports.com/nbcsn/generic?pid=206559',
|
||||
@ -354,7 +421,7 @@ class NBCSportsStreamIE(AdobePassIE):
|
||||
source_url = video_source['ottStreamUrl']
|
||||
is_live = video_source.get('type') == 'live' or video_source.get('status') == 'Live'
|
||||
resource = self._get_mvpd_resource('nbcsports', title, video_id, '')
|
||||
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource)
|
||||
token = self._extract_mvpd_auth(url, video_id, 'nbcsports', resource, None) # XXX: None arg needs to be software_statement
|
||||
tokenized_url = self._download_json(
|
||||
'https://token.playmakerservices.com/cdn',
|
||||
video_id, data=json.dumps({
|
||||
@ -534,22 +601,26 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
IE_NAME = 'nbcolympics'
|
||||
_VALID_URL = r'https?://www\.nbcolympics\.com/videos?/(?P<id>[0-9a-z-]+)'
|
||||
|
||||
_TEST = {
|
||||
_TESTS = [{
|
||||
# Geo-restricted to US
|
||||
'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
|
||||
'md5': '54fecf846d05429fbaa18af557ee523a',
|
||||
'url': 'https://www.nbcolympics.com/videos/watch-final-minutes-team-usas-mens-basketball-gold',
|
||||
'info_dict': {
|
||||
'id': 'WjTBzDXx5AUq',
|
||||
'display_id': 'justin-roses-son-leo-was-tears-after-his-dad-won-gold',
|
||||
'id': 'SAwGfPlQ1q01',
|
||||
'ext': 'mp4',
|
||||
'title': 'Rose\'s son Leo was in tears after his dad won gold',
|
||||
'description': 'Olympic gold medalist Justin Rose gets emotional talking to the impact his win in men\'s golf has already had on his children.',
|
||||
'timestamp': 1471274964,
|
||||
'upload_date': '20160815',
|
||||
'display_id': 'watch-final-minutes-team-usas-mens-basketball-gold',
|
||||
'title': 'Watch the final minutes of Team USA\'s men\'s basketball gold',
|
||||
'description': 'md5:f704f591217305c9559b23b877aa8d31',
|
||||
'uploader': 'NBCU-SPORTS',
|
||||
'duration': 387.053,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'chapters': [],
|
||||
'timestamp': 1723346984,
|
||||
'upload_date': '20240811',
|
||||
},
|
||||
'skip': '404 Not Found',
|
||||
}
|
||||
}, {
|
||||
'url': 'http://www.nbcolympics.com/video/justin-roses-son-leo-was-tears-after-his-dad-won-gold',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
@ -578,6 +649,7 @@ class NBCOlympicsIE(InfoExtractor):
|
||||
|
||||
|
||||
class NBCOlympicsStreamIE(AdobePassIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'nbcolympics:stream'
|
||||
_VALID_URL = r'https?://stream\.nbcolympics\.com/(?P<id>[0-9a-z-]+)'
|
||||
_TESTS = [
|
||||
@ -630,7 +702,8 @@ class NBCOlympicsStreamIE(AdobePassIE):
|
||||
event_config.get('resourceId', 'NBCOlympics'),
|
||||
re.sub(r'[^\w\d ]+', '', event_config['eventTitle']), pid,
|
||||
event_config.get('ratingId', 'NO VALUE'))
|
||||
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource)
|
||||
# XXX: The None arg below needs to be the software_statement for this requestor
|
||||
media_token = self._extract_mvpd_auth(url, pid, event_config.get('requestorId', 'NBCOlympics'), ap_resource, None)
|
||||
|
||||
source_url = self._download_json(
|
||||
'https://tokens.playmakerservices.com/', pid, 'Retrieving tokenized URL',
|
||||
@ -848,3 +921,178 @@ class NBCStationsIE(InfoExtractor):
|
||||
'is_live': is_live,
|
||||
**info,
|
||||
}
|
||||
|
||||
|
||||
class BravoTVIE(NBCUniversalBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?(?:bravotv|oxygen)\.com/(?:[^/?#]+/)+(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.bravotv.com/top-chef/season-16/episode-15/videos/the-top-chef-season-16-winner-is',
|
||||
'info_dict': {
|
||||
'id': '3923059',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Top Chef Season 16 Winner Is...',
|
||||
'display_id': 'the-top-chef-season-16-winner-is',
|
||||
'description': 'Find out who takes the title of Top Chef!',
|
||||
'upload_date': '20190315',
|
||||
'timestamp': 1552618860,
|
||||
'season_number': 16,
|
||||
'episode_number': 15,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'Finale',
|
||||
'duration': 190,
|
||||
'season': 'Season 16',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'uploader': 'NBCU-BRAV',
|
||||
'categories': ['Series', 'Series/Top Chef'],
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/top-chef/season-20/episode-1/london-calling',
|
||||
'info_dict': {
|
||||
'id': '9000234570',
|
||||
'ext': 'mp4',
|
||||
'title': 'London Calling',
|
||||
'display_id': 'london-calling',
|
||||
'description': 'md5:5af95a8cbac1856bd10e7562f86bb759',
|
||||
'upload_date': '20230310',
|
||||
'timestamp': 1678418100,
|
||||
'season_number': 20,
|
||||
'episode_number': 1,
|
||||
'series': 'Top Chef',
|
||||
'episode': 'London Calling',
|
||||
'duration': 3266,
|
||||
'season': 'Season 20',
|
||||
'chapters': 'count:7',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
'media_type': 'Full Episode',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/Top Chef'],
|
||||
'tags': 'count:10',
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-1/closing-night',
|
||||
'info_dict': {
|
||||
'id': '3692045',
|
||||
'ext': 'mp4',
|
||||
'title': 'Closing Night',
|
||||
'display_id': 'closing-night',
|
||||
'description': 'md5:c8a5bb523c8ef381f3328c6d9f1e4632',
|
||||
'upload_date': '20230126',
|
||||
'timestamp': 1674709200,
|
||||
'season_number': 1,
|
||||
'episode_number': 1,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Closing Night',
|
||||
'duration': 2629,
|
||||
'season': 'Season 1',
|
||||
'chapters': 'count:6',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
'media_type': 'Full Episode',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/In Ice Cold Blood'],
|
||||
'tags': ['ice-t', 'in ice cold blood', 'law and order', 'oxygen', 'true crime'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}, {
|
||||
'url': 'https://www.oxygen.com/in-ice-cold-blood/season-2/episode-16/videos/handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'info_dict': {
|
||||
'id': '3974019',
|
||||
'ext': 'mp4',
|
||||
'title': '\'Handling The Horwitz House After The Murder (Season 2, Episode 16)',
|
||||
'display_id': 'handling-the-horwitz-house-after-the-murder-season-2',
|
||||
'description': 'md5:f9d638dd6946a1c1c0533a9c6100eae5',
|
||||
'upload_date': '20190618',
|
||||
'timestamp': 1560819600,
|
||||
'season_number': 2,
|
||||
'episode_number': 16,
|
||||
'series': 'In Ice Cold Blood',
|
||||
'episode': 'Mother Vs Son',
|
||||
'duration': 68,
|
||||
'season': 'Season 2',
|
||||
'thumbnail': r're:^https://.+\.jpg',
|
||||
'age_limit': 14,
|
||||
'uploader': 'NBCU-OXY',
|
||||
'categories': ['Series/In Ice Cold Blood'],
|
||||
'tags': ['in ice cold blood', 'ice-t', 'law and order', 'true crime', 'oxygen'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.bravotv.com/below-deck/season-3/ep-14-reunion-part-1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_nbcu_video(url, display_id)
|
||||
|
||||
|
||||
class SyfyIE(NBCUniversalBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?syfy\.com/[^/?#]+/(?:season-\d+/episode-\d+/(?:videos/)?|videos/)(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.syfy.com/face-off/season-13/episode-10/videos/keyed-up',
|
||||
'info_dict': {
|
||||
'id': '3774403',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'keyed-up',
|
||||
'title': 'Keyed Up',
|
||||
'description': 'md5:feafd15bee449f212dcd3065bbe9a755',
|
||||
'age_limit': 14,
|
||||
'duration': 169,
|
||||
'thumbnail': r're:https://www\.syfy\.com/.+/.+\.jpg',
|
||||
'series': 'Face Off',
|
||||
'season': 'Season 13',
|
||||
'season_number': 13,
|
||||
'episode': 'Through the Looking Glass Part 2',
|
||||
'episode_number': 10,
|
||||
'timestamp': 1533711618,
|
||||
'upload_date': '20180808',
|
||||
'media_type': 'Excerpt',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/Face Off'],
|
||||
'tags': 'count:15',
|
||||
'_old_archive_ids': ['theplatform 3774403'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
'url': 'https://www.syfy.com/face-off/season-13/episode-10/through-the-looking-glass-part-2',
|
||||
'info_dict': {
|
||||
'id': '3772391',
|
||||
'ext': 'mp4',
|
||||
'display_id': 'through-the-looking-glass-part-2',
|
||||
'title': 'Through the Looking Glass Pt.2',
|
||||
'description': 'md5:90bd5dcbf1059fe3296c263599af41d2',
|
||||
'age_limit': 0,
|
||||
'duration': 2599,
|
||||
'thumbnail': r're:https://www\.syfy\.com/.+/.+\.jpg',
|
||||
'chapters': [{'start_time': 0.0, 'end_time': 679.0, 'title': '<Untitled Chapter 1>'},
|
||||
{'start_time': 679.0, 'end_time': 1040.967, 'title': '<Untitled Chapter 2>'},
|
||||
{'start_time': 1040.967, 'end_time': 1403.0, 'title': '<Untitled Chapter 3>'},
|
||||
{'start_time': 1403.0, 'end_time': 1870.0, 'title': '<Untitled Chapter 4>'},
|
||||
{'start_time': 1870.0, 'end_time': 2496.967, 'title': '<Untitled Chapter 5>'},
|
||||
{'start_time': 2496.967, 'end_time': 2599, 'title': '<Untitled Chapter 6>'}],
|
||||
'series': 'Face Off',
|
||||
'season': 'Season 13',
|
||||
'season_number': 13,
|
||||
'episode': 'Through the Looking Glass Part 2',
|
||||
'episode_number': 10,
|
||||
'timestamp': 1672570800,
|
||||
'upload_date': '20230101',
|
||||
'media_type': 'Full Episode',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'categories': ['Series/Face Off'],
|
||||
'tags': 'count:15',
|
||||
'_old_archive_ids': ['theplatform 3772391'],
|
||||
},
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'skip': 'This video requires AdobePass MSO credentials',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
return self._extract_nbcu_video(url, display_id, old_ie_key='ThePlatform')
|
||||
|
@ -1,59 +1,57 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
determine_ext,
|
||||
get_element_by_attribute,
|
||||
UnsupportedError,
|
||||
clean_html,
|
||||
int_or_none,
|
||||
js_to_json,
|
||||
mimetype2ext,
|
||||
update_url_query,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
str_or_none,
|
||||
update_url,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class NobelPrizeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
|
||||
_TEST = {
|
||||
'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'md5': '04c81e5714bb36cc4e2232fee1d8157f',
|
||||
_VALID_URL = r'https?://(?:(?:mediaplayer|www)\.)?nobelprize\.org/mediaplayer/'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.nobelprize.org/mediaplayer/?id=2636',
|
||||
'info_dict': {
|
||||
'id': '2636',
|
||||
'ext': 'mp4',
|
||||
'title': 'Announcement of the 2016 Nobel Prize in Physics',
|
||||
'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
|
||||
'description': 'md5:1a2d8a6ca80c88fb3b9a326e0b0e8e43',
|
||||
'duration': 1560.0,
|
||||
'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg',
|
||||
'timestamp': 1504883793,
|
||||
'upload_date': '20170908',
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://mediaplayer.nobelprize.org/mediaplayer/?qid=12693',
|
||||
'info_dict': {
|
||||
'id': '12693',
|
||||
'ext': 'mp4',
|
||||
'title': 'Nobel Lecture by Peter Higgs',
|
||||
'description': 'md5:9b12e275dbe3a8138484e70e00673a05',
|
||||
'duration': 1800.0,
|
||||
'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg',
|
||||
'timestamp': 1504883793,
|
||||
'upload_date': '20170908',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
media = self._parse_json(self._search_regex(
|
||||
r'(?s)var\s*config\s*=\s*({.+?});', webpage,
|
||||
'config'), video_id, js_to_json)['media']
|
||||
title = media['title']
|
||||
|
||||
formats = []
|
||||
for source in media.get('source', []):
|
||||
source_src = source.get('src')
|
||||
if not source_src:
|
||||
continue
|
||||
ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
|
||||
if ext == 'm3u8':
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
source_src, video_id, 'mp4', 'm3u8_native',
|
||||
m3u8_id='hls', fatal=False))
|
||||
elif ext == 'f4m':
|
||||
formats.extend(self._extract_f4m_formats(
|
||||
update_url_query(source_src, {'hdcore': '3.7.0'}),
|
||||
video_id, f4m_id='hds', fatal=False))
|
||||
else:
|
||||
formats.append({
|
||||
'url': source_src,
|
||||
})
|
||||
video_id = traverse_obj(parse_qs(url), (
|
||||
('id', 'qid'), -1, {int_or_none}, {str_or_none}, any))
|
||||
if not video_id:
|
||||
raise UnsupportedError(url)
|
||||
webpage = self._download_webpage(
|
||||
update_url(url, netloc='mediaplayer.nobelprize.org'), video_id)
|
||||
|
||||
return {
|
||||
**self._search_json_ld(webpage, video_id),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'description': get_element_by_attribute('itemprop', 'description', webpage),
|
||||
'duration': int_or_none(media.get('duration')),
|
||||
'formats': formats,
|
||||
'title': self._html_search_meta('caption', webpage),
|
||||
'description': traverse_obj(webpage, (
|
||||
{find_element(tag='span', attr='itemprop', value='description')}, {clean_html})),
|
||||
'duration': parse_duration(self._html_search_meta('duration', webpage)),
|
||||
}
|
||||
|
@ -1,55 +1,82 @@
|
||||
from .common import InfoExtractor
|
||||
from .streaks import StreaksBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
smuggle_url,
|
||||
traverse_obj,
|
||||
int_or_none,
|
||||
parse_iso8601,
|
||||
str_or_none,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class NTVCoJpCUIE(InfoExtractor):
|
||||
class NTVCoJpCUIE(StreaksBaseIE):
|
||||
IE_NAME = 'cu.ntv.co.jp'
|
||||
IE_DESC = 'Nippon Television Network'
|
||||
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program)(?P<id>[^/?&#]+)'
|
||||
_TEST = {
|
||||
'url': 'https://cu.ntv.co.jp/televiva-chill-gohan_181031/',
|
||||
IE_DESC = '日テレ無料TADA!'
|
||||
_VALID_URL = r'https?://cu\.ntv\.co\.jp/(?!program-list|search)(?P<id>[\w-]+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://cu.ntv.co.jp/gaki_20250525/',
|
||||
'info_dict': {
|
||||
'id': '5978891207001',
|
||||
'id': 'gaki_20250525',
|
||||
'ext': 'mp4',
|
||||
'title': '桜エビと炒り卵がポイント! 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
|
||||
'upload_date': '20181213',
|
||||
'description': 'md5:1985b51a9abc285df0104d982a325f2a',
|
||||
'uploader_id': '3855502814001',
|
||||
'timestamp': 1544669941,
|
||||
'title': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
|
||||
'cast': 'count:2',
|
||||
'description': 'md5:1e1db556224d627d4d2f74370c650927',
|
||||
'display_id': 'ref:gaki_20250525',
|
||||
'duration': 1450,
|
||||
'episode': '放送開始36年!方正ココリコが選ぶ神回&地獄回!',
|
||||
'episode_id': '000000010172808',
|
||||
'episode_number': 255,
|
||||
'genres': ['variety'],
|
||||
'live_status': 'not_live',
|
||||
'modified_date': '20250525',
|
||||
'modified_timestamp': 1748145537,
|
||||
'release_date': '20250525',
|
||||
'release_timestamp': 1748145539,
|
||||
'series': 'ダウンタウンのガキの使いやあらへんで!',
|
||||
'series_id': 'gaki',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
'timestamp': 1748145197,
|
||||
'upload_date': '20250525',
|
||||
'uploader': '日本テレビ放送網',
|
||||
'uploader_id': '0x7FE2',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
|
||||
BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
player_config = self._search_nuxt_data(webpage, display_id)
|
||||
video_id = traverse_obj(player_config, ('movie', 'video_id'))
|
||||
if not video_id:
|
||||
raise ExtractorError('Failed to extract video ID for Brightcove')
|
||||
account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
|
||||
title = traverse_obj(player_config, ('movie', 'name'))
|
||||
if not title:
|
||||
og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
|
||||
if og_title:
|
||||
title = og_title.split('(', 1)[0].strip()
|
||||
description = (traverse_obj(player_config, ('movie', 'description'))
|
||||
or self._html_search_meta(['description', 'og:description'], webpage))
|
||||
|
||||
info = self._search_json(
|
||||
r'window\.app\s*=', webpage, 'video info',
|
||||
display_id)['falcorCache']['catalog']['episode'][display_id]['value']
|
||||
media_id = traverse_obj(info, (
|
||||
'streaks_data', 'mediaid', {str_or_none}, {require('Streaks media ID')}))
|
||||
non_phonetic = (lambda _, v: v['is_phonetic'] is False, 'value', {str})
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
'title': title,
|
||||
'description': description,
|
||||
'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
|
||||
'ie_key': 'BrightcoveNew',
|
||||
**self._extract_from_streaks_api('ntv-tada', media_id, headers={
|
||||
'X-Streaks-Api-Key': 'df497719056b44059a0483b8faad1f4a',
|
||||
}),
|
||||
**traverse_obj(info, {
|
||||
'id': ('content_id', {str_or_none}),
|
||||
'title': ('title', *non_phonetic, any),
|
||||
'age_limit': ('is_adult_only_content', {lambda x: 18 if x else None}),
|
||||
'cast': ('credit', ..., 'name', *non_phonetic),
|
||||
'genres': ('genre', ..., {str}),
|
||||
'release_timestamp': ('pub_date', {parse_iso8601}),
|
||||
'tags': ('tags', ..., {str}),
|
||||
'thumbnail': ('artwork', ..., 'url', any, {url_or_none}),
|
||||
}),
|
||||
**traverse_obj(info, ('tv_episode_info', {
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'episode_number': ('episode_number', {int}),
|
||||
'series': ('parent_show_title', *non_phonetic, any),
|
||||
'series_id': ('show_content_id', {str}),
|
||||
})),
|
||||
**traverse_obj(info, ('custom_data', {
|
||||
'description': ('program_detail', {str}),
|
||||
'episode': ('episode_title', {str}),
|
||||
'episode_id': ('episode_id', {str_or_none}),
|
||||
'uploader': ('network_name', {str}),
|
||||
'uploader_id': ('network_id', {str}),
|
||||
})),
|
||||
}
|
||||
|
@ -273,6 +273,8 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
return self._extract_desktop(smuggle_url(url, {'referrer': 'https://boosty.to'}))
|
||||
elif error:
|
||||
raise ExtractorError(error, expected=True)
|
||||
elif '>Access to this video is restricted</div>' in webpage:
|
||||
self.raise_login_required()
|
||||
|
||||
player = self._parse_json(
|
||||
unescapeHTML(self._search_regex(
|
||||
@ -429,7 +431,7 @@ class OdnoklassnikiIE(InfoExtractor):
|
||||
video_id = self._match_id(url)
|
||||
|
||||
webpage = self._download_webpage(
|
||||
f'http://m.ok.ru/video/{video_id}', video_id,
|
||||
f'https://m.ok.ru/video/{video_id}', video_id,
|
||||
note='Downloading mobile webpage')
|
||||
|
||||
error = self._search_regex(
|
||||
|
@ -340,8 +340,9 @@ class PatreonIE(PatreonBaseIE):
|
||||
'channel_follower_count': ('attributes', 'patron_count', {int_or_none}),
|
||||
}))
|
||||
|
||||
# all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, Vimeo
|
||||
headers = {'referer': 'https://patreon.com/'}
|
||||
# Must be all-lowercase 'referer' so we can smuggle it to Generic, SproutVideo, and Vimeo.
|
||||
# patreon.com URLs redirect to www.patreon.com; this matters when requesting mux.com m3u8s
|
||||
headers = {'referer': 'https://www.patreon.com/'}
|
||||
|
||||
# handle Vimeo embeds
|
||||
if traverse_obj(attributes, ('embed', 'provider')) == 'Vimeo':
|
||||
@ -352,7 +353,7 @@ class PatreonIE(PatreonBaseIE):
|
||||
v_url, video_id, 'Checking Vimeo embed URL', headers=headers,
|
||||
fatal=False, errnote=False, expected_status=429): # 429 is TLS fingerprint rejection
|
||||
entries.append(self.url_result(
|
||||
VimeoIE._smuggle_referrer(v_url, 'https://patreon.com/'),
|
||||
VimeoIE._smuggle_referrer(v_url, headers['referer']),
|
||||
VimeoIE, url_transparent=True))
|
||||
|
||||
embed_url = traverse_obj(attributes, ('embed', 'url', {url_or_none}))
|
||||
@ -379,11 +380,13 @@ class PatreonIE(PatreonBaseIE):
|
||||
'url': post_file['url'],
|
||||
})
|
||||
elif name == 'video' or determine_ext(post_file.get('url')) == 'm3u8':
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(post_file['url'], video_id)
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(
|
||||
post_file['url'], video_id, headers=headers)
|
||||
entries.append({
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'http_headers': headers,
|
||||
})
|
||||
|
||||
can_view_post = traverse_obj(attributes, 'current_user_can_view')
|
||||
|
@ -5,11 +5,13 @@ from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
OnDemandPagedList,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
orderedSet,
|
||||
str_or_none,
|
||||
str_to_int,
|
||||
traverse_obj,
|
||||
unified_timestamp,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class PodchaserIE(InfoExtractor):
|
||||
@ -21,24 +23,25 @@ class PodchaserIE(InfoExtractor):
|
||||
'id': '104365585',
|
||||
'title': 'Ep. 285 – freeze me off',
|
||||
'description': 'cam ahn',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
'thumbnail': r're:https?://.+/.+\.jpg',
|
||||
'ext': 'mp3',
|
||||
'categories': ['Comedy'],
|
||||
'categories': ['Comedy', 'News', 'Politics', 'Arts'],
|
||||
'tags': ['comedy', 'dark humor'],
|
||||
'series': 'Cum Town',
|
||||
'series': 'The Adam Friedland Show Podcast',
|
||||
'duration': 3708,
|
||||
'timestamp': 1636531259,
|
||||
'upload_date': '20211110',
|
||||
'average_rating': 4.0,
|
||||
'series_id': '36924',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/the-bone-zone-28853',
|
||||
'info_dict': {
|
||||
'id': '28853',
|
||||
'title': 'The Bone Zone',
|
||||
'description': 'Podcast by The Bone Zone',
|
||||
'description': r're:The official home of the Bone Zone podcast.+',
|
||||
},
|
||||
'playlist_count': 275,
|
||||
'playlist_mincount': 275,
|
||||
}, {
|
||||
'url': 'https://www.podchaser.com/podcasts/sean-carrolls-mindscape-scienc-699349/episodes',
|
||||
'info_dict': {
|
||||
@ -51,19 +54,33 @@ class PodchaserIE(InfoExtractor):
|
||||
|
||||
@staticmethod
|
||||
def _parse_episode(episode, podcast):
|
||||
return {
|
||||
'id': str(episode.get('id')),
|
||||
'title': episode.get('title'),
|
||||
'description': episode.get('description'),
|
||||
'url': episode.get('audio_url'),
|
||||
'thumbnail': episode.get('image_url'),
|
||||
'duration': str_to_int(episode.get('length')),
|
||||
'timestamp': unified_timestamp(episode.get('air_date')),
|
||||
'average_rating': float_or_none(episode.get('rating')),
|
||||
'categories': list(set(traverse_obj(podcast, (('summary', None), 'categories', ..., 'text')))),
|
||||
'tags': traverse_obj(podcast, ('tags', ..., 'text')),
|
||||
'series': podcast.get('title'),
|
||||
}
|
||||
info = traverse_obj(episode, {
|
||||
'id': ('id', {int}, {str_or_none}, {require('episode ID')}),
|
||||
'title': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'url': ('audio_url', {url_or_none}),
|
||||
'thumbnail': ('image_url', {url_or_none}),
|
||||
'duration': ('length', {int_or_none}),
|
||||
'timestamp': ('air_date', {unified_timestamp}),
|
||||
'average_rating': ('rating', {float_or_none}),
|
||||
})
|
||||
info.update(traverse_obj(podcast, {
|
||||
'series': ('title', {str}),
|
||||
'series_id': ('id', {int}, {str_or_none}),
|
||||
'categories': (('summary', None), 'categories', ..., 'text', {str}, filter, all, {orderedSet}),
|
||||
'tags': ('tags', ..., 'text', {str}),
|
||||
}))
|
||||
info['vcodec'] = 'none'
|
||||
|
||||
if info.get('series_id'):
|
||||
podcast_slug = traverse_obj(podcast, ('slug', {str})) or 'podcast'
|
||||
episode_slug = traverse_obj(episode, ('slug', {str})) or 'episode'
|
||||
info['webpage_url'] = '/'.join((
|
||||
'https://www.podchaser.com/podcasts',
|
||||
'-'.join((podcast_slug[:30].rstrip('-'), info['series_id'])),
|
||||
'-'.join((episode_slug[:30].rstrip('-'), info['id']))))
|
||||
|
||||
return info
|
||||
|
||||
def _call_api(self, path, *args, **kwargs):
|
||||
return self._download_json(f'https://api.podchaser.com/{path}', *args, **kwargs)
|
||||
@ -93,5 +110,5 @@ class PodchaserIE(InfoExtractor):
|
||||
OnDemandPagedList(functools.partial(self._fetch_page, podcast_id, podcast), self._PAGE_SIZE),
|
||||
str_or_none(podcast.get('id')), podcast.get('title'), podcast.get('description'))
|
||||
|
||||
episode = self._call_api(f'episodes/{episode_id}', episode_id)
|
||||
episode = self._call_api(f'podcasts/{podcast_id}/episodes/{episode_id}/player_ids', episode_id)
|
||||
return self._parse_episode(episode, podcast)
|
||||
|
@ -15,7 +15,6 @@ from ..utils import (
|
||||
str_or_none,
|
||||
strip_jsonp,
|
||||
traverse_obj,
|
||||
unescapeHTML,
|
||||
url_or_none,
|
||||
urljoin,
|
||||
)
|
||||
@ -425,7 +424,7 @@ class QQMusicPlaylistIE(QQPlaylistBaseIE):
|
||||
|
||||
return self.playlist_result(entries, list_id, **traverse_obj(list_json, ('cdlist', 0, {
|
||||
'title': ('dissname', {str}),
|
||||
'description': ('desc', {unescapeHTML}, {clean_html}),
|
||||
'description': ('desc', {clean_html}),
|
||||
})))
|
||||
|
||||
|
||||
|
@ -1,57 +1,102 @@
|
||||
from .ard import ARDMediathekBaseIE
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
get_element_by_attribute,
|
||||
clean_html,
|
||||
extract_attributes,
|
||||
parse_duration,
|
||||
parse_qs,
|
||||
unified_strdate,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
find_element,
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class SRMediathekIE(ARDMediathekBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'sr:mediathek'
|
||||
IE_DESC = 'Saarländischer Rundfunk'
|
||||
_VALID_URL = r'https?://sr-mediathek(?:\.sr-online)?\.de/index\.php\?.*?&id=(?P<id>[0-9]+)'
|
||||
|
||||
_CLS_COMMON = 'teaser__image__caption__text teaser__image__caption__text--'
|
||||
_VALID_URL = r'https?://(?:www\.)?sr-mediathek\.de/index\.php\?.*?&id=(?P<id>\d+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=28455',
|
||||
'url': 'https://www.sr-mediathek.de/index.php?seite=7&id=141317',
|
||||
'info_dict': {
|
||||
'id': '28455',
|
||||
'id': '141317',
|
||||
'ext': 'mp4',
|
||||
'title': 'sportarena (26.10.2014)',
|
||||
'description': 'Ringen: KSV Köllerbach gegen Aachen-Walheim; Frauen-Fußball: 1. FC Saarbrücken gegen Sindelfingen; Motorsport: Rallye in Losheim; dazu: Interview mit Timo Bernhard; Turnen: TG Saar; Reitsport: Deutscher Voltigier-Pokal; Badminton: Interview mit Michael Fuchs ',
|
||||
'thumbnail': r're:^https?://.*\.jpg$',
|
||||
},
|
||||
'skip': 'no longer available',
|
||||
}, {
|
||||
'url': 'http://sr-mediathek.sr-online.de/index.php?seite=7&id=37682',
|
||||
'info_dict': {
|
||||
'id': '37682',
|
||||
'ext': 'mp4',
|
||||
'title': 'Love, Cakes and Rock\'n\'Roll',
|
||||
'description': 'md5:18bf9763631c7d326c22603681e1123d',
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
'title': 'Kärnten, da will ich hin!',
|
||||
'channel': 'SR Fernsehen',
|
||||
'description': 'md5:7732e71e803379a499732864a572a456',
|
||||
'duration': 1788.0,
|
||||
'release_date': '20250525',
|
||||
'series': 'da will ich hin!',
|
||||
'series_id': 'DWIH',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'http://sr-mediathek.de/index.php?seite=7&id=7480',
|
||||
'only_matching': True,
|
||||
'url': 'https://www.sr-mediathek.de/index.php?seite=7&id=153853',
|
||||
'info_dict': {
|
||||
'id': '153853',
|
||||
'ext': 'mp3',
|
||||
'title': 'Kappes, Klöße, Kokosmilch: Bruschetta mit Nduja',
|
||||
'channel': 'SR 3',
|
||||
'description': 'md5:3935798de3562b10c4070b408a15e225',
|
||||
'duration': 139.0,
|
||||
'release_date': '20250523',
|
||||
'series': 'Kappes, Klöße, Kokosmilch',
|
||||
'series_id': 'SR3_KKK_A',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.sr-mediathek.de/index.php?seite=7&id=31406&pnr=&tbl=pf',
|
||||
'info_dict': {
|
||||
'id': '31406',
|
||||
'ext': 'mp3',
|
||||
'title': 'Das Leben schwer nehmen, ist einfach zu anstrengend',
|
||||
'channel': 'SR 1',
|
||||
'description': 'md5:3e03fd556af831ad984d0add7175fb0c',
|
||||
'duration': 1769.0,
|
||||
'release_date': '20230717',
|
||||
'series': 'Abendrot',
|
||||
'series_id': 'SR1_AB_P',
|
||||
'thumbnail': r're:https?://.+\.jpg',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
description = self._og_search_description(webpage)
|
||||
|
||||
if '>Der gewünschte Beitrag ist leider nicht mehr verfügbar.<' in webpage:
|
||||
if description == 'Der gewünschte Beitrag ist leider nicht mehr vorhanden.':
|
||||
raise ExtractorError(f'Video {video_id} is no longer available', expected=True)
|
||||
|
||||
media_collection_url = self._search_regex(
|
||||
r'data-mediacollection-ardplayer="([^"]+)"', webpage, 'media collection url')
|
||||
info = self._extract_media_info(media_collection_url, webpage, video_id)
|
||||
info.update({
|
||||
player_url = traverse_obj(webpage, (
|
||||
{find_element(tag='div', id=f'player{video_id}', html=True)},
|
||||
{extract_attributes}, 'data-mediacollection-ardplayer',
|
||||
{self._proto_relative_url}, {require('player URL')}))
|
||||
article = traverse_obj(webpage, (
|
||||
{find_element(cls='article__content')},
|
||||
{find_element(tag='p')}, {clean_html}))
|
||||
|
||||
return {
|
||||
**self._extract_media_info(player_url, webpage, video_id),
|
||||
'id': video_id,
|
||||
'title': get_element_by_attribute('class', 'ardplayer-title', webpage),
|
||||
'description': self._og_search_description(webpage),
|
||||
'title': traverse_obj(webpage, (
|
||||
{find_element(cls='ardplayer-title')}, {clean_html})),
|
||||
'channel': traverse_obj(webpage, (
|
||||
{find_element(cls=f'{self._CLS_COMMON}subheadline')},
|
||||
{lambda x: x.split('|')[0]}, {clean_html})),
|
||||
'description': description,
|
||||
'duration': parse_duration(self._search_regex(
|
||||
r'(\d{2}:\d{2}:\d{2})', article, 'duration')),
|
||||
'release_date': unified_strdate(self._search_regex(
|
||||
r'(\d{2}\.\d{2}\.\d{4})', article, 'release_date')),
|
||||
'series': traverse_obj(webpage, (
|
||||
{find_element(cls=f'{self._CLS_COMMON}headline')}, {clean_html})),
|
||||
'series_id': traverse_obj(webpage, (
|
||||
{find_element(cls='teaser__link', html=True)},
|
||||
{extract_attributes}, 'href', {parse_qs}, 'sen', ..., {str}, any)),
|
||||
'thumbnail': self._og_search_thumbnail(webpage),
|
||||
})
|
||||
return info
|
||||
}
|
||||
|
@ -4,6 +4,7 @@ from .wrestleuniverse import WrestleUniverseBaseIE
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
url_basename,
|
||||
url_or_none,
|
||||
)
|
||||
|
||||
@ -65,9 +66,19 @@ class StacommuBaseIE(WrestleUniverseBaseIE):
|
||||
hls_info, decrypt = self._call_encrypted_api(
|
||||
video_id, ':watchArchive', 'stream information', data={'method': 1})
|
||||
|
||||
formats = self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id)
|
||||
for f in formats:
|
||||
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
|
||||
if f.get('tbr'):
|
||||
f['tbr'] = int(f['tbr'] / 2.5)
|
||||
# prefer variants with the same basename as the master playlist to avoid partial streams
|
||||
f['format_id'] = url_basename(f['url']).partition('.')[0]
|
||||
if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
|
||||
f['preference'] = -10
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
|
||||
'formats': formats,
|
||||
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
|
||||
**traverse_obj(video_info, {
|
||||
'title': ('displayName', {str}),
|
||||
|
@ -1,76 +1,76 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import int_or_none, urljoin
|
||||
from .youtube import YoutubeIE
|
||||
from ..utils import (
|
||||
clean_html,
|
||||
parse_iso8601,
|
||||
update_url,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import subs_list_to_dict, traverse_obj
|
||||
|
||||
|
||||
class StarTrekIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'(?P<base>https?://(?:intl|www)\.startrek\.com)/videos/(?P<id>[^/]+)'
|
||||
IE_NAME = 'startrek'
|
||||
IE_DESC = 'STAR TREK'
|
||||
_VALID_URL = r'https?://(?:www\.)?startrek\.com(?:/en-(?:ca|un))?/videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
|
||||
'md5': '491df5035c9d4dc7f63c79caaf9c839e',
|
||||
'url': 'https://www.startrek.com/en-un/videos/official-trailer-star-trek-lower-decks-season-4',
|
||||
'info_dict': {
|
||||
'id': 'watch-welcoming-jess-bush-to-the-ready-room',
|
||||
'id': 'official-trailer-star-trek-lower-decks-season-4',
|
||||
'ext': 'mp4',
|
||||
'title': 'WATCH: Welcoming Jess Bush to The Ready Room',
|
||||
'duration': 1888,
|
||||
'timestamp': 1655388000,
|
||||
'upload_date': '20220616',
|
||||
'description': 'md5:1ffee884e3920afbdd6dd04e926a1221',
|
||||
'thumbnail': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14794_rr_thumb_107_yt_16x9\.jpg(?:\?.+)?',
|
||||
'subtitles': {'en-US': [{
|
||||
'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_107_v4\.vtt',
|
||||
}, {
|
||||
'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8',
|
||||
}]},
|
||||
'title': 'Official Trailer | Star Trek: Lower Decks - Season 4',
|
||||
'alt_title': 'md5:dd7e3191aaaf9e95db16fc3abd5ef68b',
|
||||
'categories': ['TRAILERS'],
|
||||
'description': 'md5:563d7856ddab99bee7a5e50f45531757',
|
||||
'release_date': '20230722',
|
||||
'release_timestamp': 1690033200,
|
||||
'series': 'Star Trek: Lower Decks',
|
||||
'series_id': 'star-trek-lower-decks',
|
||||
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
|
||||
'md5': 'f5ad74fbb86e91e0882fc0a333178d1d',
|
||||
'url': 'https://www.startrek.com/en-ca/videos/my-first-contact-senator-cory-booker',
|
||||
'info_dict': {
|
||||
'id': 'watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
|
||||
'id': 'my-first-contact-senator-cory-booker',
|
||||
'ext': 'mp4',
|
||||
'title': 'WATCH: Ethan Peck and Gia Sandhu Beam Down to The Ready Room',
|
||||
'duration': 1986,
|
||||
'timestamp': 1654221600,
|
||||
'upload_date': '20220603',
|
||||
'description': 'md5:b3aa0edacfe119386567362dec8ed51b',
|
||||
'thumbnail': r're:https://www\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14792_rr_thumb_105_yt_16x9_1.jpg(?:\?.+)?',
|
||||
'subtitles': {'en-US': [{
|
||||
'url': r're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt',
|
||||
}]},
|
||||
'title': 'My First Contact: Senator Cory Booker',
|
||||
'alt_title': 'md5:fe74a8bdb0afab421c6e159a7680db4d',
|
||||
'categories': ['MY FIRST CONTACT'],
|
||||
'description': 'md5:a3992ab3b3e0395925d71156bbc018ce',
|
||||
'release_date': '20250401',
|
||||
'release_timestamp': 1743512400,
|
||||
'series': 'Star Trek: The Original Series',
|
||||
'series_id': 'star-trek-the-original-series',
|
||||
'thumbnail': r're:https?://.+\.(?:jpg|png)',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
urlbase, video_id = self._match_valid_url(url).group('base', 'id')
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
player = self._search_regex(
|
||||
r'(<\s*div\s+id\s*=\s*"cvp-player-[^<]+<\s*/div\s*>)', webpage, 'player')
|
||||
page_props = self._search_nextjs_data(webpage, video_id)['props']['pageProps']
|
||||
video_data = page_props['video']['data']
|
||||
if youtube_id := video_data.get('youtube_video_id'):
|
||||
return self.url_result(youtube_id, YoutubeIE)
|
||||
|
||||
hls = self._html_search_regex(r'\bdata-hls\s*=\s*"([^"]+)"', player, 'HLS URL')
|
||||
formats, subtitles = self._extract_m3u8_formats_and_subtitles(hls, video_id, 'mp4')
|
||||
|
||||
captions = self._html_search_regex(
|
||||
r'\bdata-captions-url\s*=\s*"([^"]+)"', player, 'captions URL', fatal=False)
|
||||
if captions:
|
||||
subtitles.setdefault('en-US', [])[:0] = [{'url': urljoin(urlbase, captions)}]
|
||||
|
||||
# NB: Most of the data in the json_ld is undesirable
|
||||
json_ld = self._search_json_ld(webpage, video_id, fatal=False)
|
||||
series_id = traverse_obj(video_data, (
|
||||
'series_and_movies', ..., 'series_or_movie', 'slug', {str}, any))
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'title': self._html_search_regex(
|
||||
r'\bdata-title\s*=\s*"([^"]+)"', player, 'title', json_ld.get('title')),
|
||||
'description': self._html_search_regex(
|
||||
r'(?s)<\s*div\s+class\s*=\s*"header-body"\s*>(.+?)<\s*/div\s*>',
|
||||
webpage, 'description', fatal=False),
|
||||
'duration': int_or_none(self._html_search_regex(
|
||||
r'\bdata-duration\s*=\s*"(\d+)"', player, 'duration', fatal=False)),
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
'thumbnail': urljoin(urlbase, self._html_search_regex(
|
||||
r'\bdata-poster-url\s*=\s*"([^"]+)"', player, 'thumbnail', fatal=False)),
|
||||
'timestamp': json_ld.get('timestamp'),
|
||||
'series': traverse_obj(page_props, (
|
||||
'queried', 'header', 'tab3', 'slices', ..., 'items',
|
||||
lambda _, v: v['link']['slug'] == series_id, 'link_copy', {str}, any)),
|
||||
'series_id': series_id,
|
||||
**traverse_obj(video_data, {
|
||||
'title': ('title', ..., 'text', {clean_html}, any),
|
||||
'alt_title': ('subhead', ..., 'text', {clean_html}, any),
|
||||
'categories': ('category', 'data', 'category_name', {str.upper}, filter, all),
|
||||
'description': ('slices', ..., 'primary', 'content', ..., 'text', {clean_html}, any),
|
||||
'release_timestamp': ('published', {parse_iso8601}),
|
||||
'subtitles': ({'url': 'legacy_subtitle_file'}, all, {subs_list_to_dict(lang='en')}),
|
||||
'thumbnail': ('poster_frame', 'url', {url_or_none}, {update_url(query=None)}),
|
||||
'url': ('legacy_video_url', {url_or_none}),
|
||||
}),
|
||||
}
|
||||
|
@ -6,10 +6,13 @@ from ..utils import (
|
||||
determine_ext,
|
||||
dict_get,
|
||||
int_or_none,
|
||||
traverse_obj,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
)
|
||||
from ..utils.traversal import (
|
||||
require,
|
||||
traverse_obj,
|
||||
)
|
||||
|
||||
|
||||
class SVTBaseIE(InfoExtractor):
|
||||
@ -97,40 +100,8 @@ class SVTBaseIE(InfoExtractor):
|
||||
}
|
||||
|
||||
|
||||
class SVTIE(SVTBaseIE):
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/wd\?(?:.*?&)?widgetId=(?P<widget_id>\d+)&.*?\barticleId=(?P<id>\d+)'
|
||||
_EMBED_REGEX = [rf'(?:<iframe src|href)="(?P<url>{_VALID_URL}[^"]*)"']
|
||||
_TEST = {
|
||||
'url': 'http://www.svt.se/wd?widgetId=23991§ionId=541&articleId=2900353&type=embed&contextSectionId=123&autostart=false',
|
||||
'md5': '33e9a5d8f646523ce0868ecfb0eed77d',
|
||||
'info_dict': {
|
||||
'id': '2900353',
|
||||
'ext': 'mp4',
|
||||
'title': 'Stjärnorna skojar till det - under SVT-intervjun',
|
||||
'duration': 27,
|
||||
'age_limit': 0,
|
||||
},
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
widget_id = mobj.group('widget_id')
|
||||
article_id = mobj.group('id')
|
||||
|
||||
info = self._download_json(
|
||||
f'http://www.svt.se/wd?widgetId={widget_id}&articleId={article_id}&format=json&type=embed&output=json',
|
||||
article_id)
|
||||
|
||||
info_dict = self._extract_video(info['video'], article_id)
|
||||
info_dict['title'] = info['context']['title']
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTPlayBaseIE(SVTBaseIE):
|
||||
_SVTPLAY_RE = r'root\s*\[\s*(["\'])_*svtplay\1\s*\]\s*=\s*(?P<json>{.+?})\s*;\s*\n'
|
||||
|
||||
|
||||
class SVTPlayIE(SVTPlayBaseIE):
|
||||
class SVTPlayIE(SVTBaseIE):
|
||||
IE_NAME = 'svt:play'
|
||||
IE_DESC = 'SVT Play and Öppet arkiv'
|
||||
_VALID_URL = r'''(?x)
|
||||
(?:
|
||||
@ -173,6 +144,7 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
'ext': 'mp4',
|
||||
'title': '1. Farlig kryssning',
|
||||
'timestamp': 1491019200,
|
||||
'description': 'md5:8f350bc605677a5ead36a19a62fd9a34',
|
||||
'upload_date': '20170401',
|
||||
'duration': 2566,
|
||||
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||
@ -186,19 +158,21 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
'params': {
|
||||
'skip_download': 'm3u8',
|
||||
},
|
||||
'expected_warnings': [r'Failed to download (?:MPD|m3u8)'],
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/video/jz2rYz7/anders-hansen-moter/james-fallon?info=visa',
|
||||
'info_dict': {
|
||||
'id': 'jvXAGVb',
|
||||
'ext': 'mp4',
|
||||
'title': 'James Fallon',
|
||||
'timestamp': 1673917200,
|
||||
'upload_date': '20230117',
|
||||
'description': r're:James Fallon är hjärnforskaren .{532} att upptäcka psykopati tidigt\?$',
|
||||
'timestamp': 1743379200,
|
||||
'upload_date': '20250331',
|
||||
'duration': 1081,
|
||||
'thumbnail': r're:^https?://(?:.*[\.-]jpg|www.svtstatic.se/image/.*)$',
|
||||
'age_limit': 0,
|
||||
'episode': 'James Fallon',
|
||||
'series': 'Anders Hansen möter...',
|
||||
'series': 'Anders Hansen möter',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': 'dash',
|
||||
@ -233,96 +207,75 @@ class SVTPlayIE(SVTPlayBaseIE):
|
||||
'only_matching': True,
|
||||
}]
|
||||
|
||||
def _extract_by_video_id(self, video_id, webpage=None):
|
||||
def _extract_by_video_id(self, video_id):
|
||||
data = self._download_json(
|
||||
f'https://api.svt.se/videoplayer-api/video/{video_id}',
|
||||
video_id, headers=self.geo_verification_headers())
|
||||
info_dict = self._extract_video(data, video_id)
|
||||
|
||||
if not info_dict.get('title'):
|
||||
title = dict_get(info_dict, ('episode', 'series'))
|
||||
if not title and webpage:
|
||||
title = re.sub(
|
||||
r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
|
||||
if not title:
|
||||
title = video_id
|
||||
info_dict['title'] = title
|
||||
info_dict['title'] = traverse_obj(info_dict, 'episode', 'series')
|
||||
|
||||
return info_dict
|
||||
|
||||
def _real_extract(self, url):
|
||||
mobj = self._match_valid_url(url)
|
||||
video_id = mobj.group('id')
|
||||
svt_id = mobj.group('svt_id') or mobj.group('modal_id')
|
||||
|
||||
if svt_id:
|
||||
return self._extract_by_video_id(svt_id)
|
||||
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
|
||||
data = self._parse_json(
|
||||
self._search_regex(
|
||||
self._SVTPLAY_RE, webpage, 'embedded data', default='{}',
|
||||
group='json'),
|
||||
video_id, fatal=False)
|
||||
|
||||
thumbnail = self._og_search_thumbnail(webpage)
|
||||
|
||||
if data:
|
||||
video_info = try_get(
|
||||
data, lambda x: x['context']['dispatcher']['stores']['VideoTitlePageStore']['data']['video'],
|
||||
dict)
|
||||
if video_info:
|
||||
info_dict = self._extract_video(video_info, video_id)
|
||||
info_dict.update({
|
||||
'title': data['context']['dispatcher']['stores']['MetaStore']['title'],
|
||||
'thumbnail': thumbnail,
|
||||
})
|
||||
return info_dict
|
||||
|
||||
svt_id = try_get(
|
||||
data, lambda x: x['statistics']['dataLake']['content']['id'],
|
||||
str)
|
||||
|
||||
data = traverse_obj(self._search_nextjs_data(webpage, video_id), (
|
||||
'props', 'urqlState', ..., 'data', {json.loads},
|
||||
'detailsPageByPath', {dict}, any, {require('video data')}))
|
||||
details = traverse_obj(data, (
|
||||
'modules', lambda _, v: v['details']['smartStart']['item']['videos'], 'details', any))
|
||||
svt_id = traverse_obj(details, (
|
||||
'smartStart', 'item', 'videos',
|
||||
# There can be 'AudioDescribed' and 'SignInterpreted' variants; try 'Default' or else get first
|
||||
(lambda _, v: v['accessibility'] == 'Default', 0),
|
||||
'svtId', {str}, any))
|
||||
if not svt_id:
|
||||
nextjs_data = self._search_nextjs_data(webpage, video_id, fatal=False)
|
||||
svt_id = traverse_obj(nextjs_data, (
|
||||
'props', 'urqlState', ..., 'data', {json.loads}, 'detailsPageByPath',
|
||||
'video', 'svtId', {str}), get_all=False)
|
||||
svt_id = traverse_obj(data, ('video', 'svtId', {str}, {require('SVT ID')}))
|
||||
|
||||
if not svt_id:
|
||||
svt_id = self._search_regex(
|
||||
(r'<video[^>]+data-video-id=["\']([\da-zA-Z-]+)',
|
||||
r'<[^>]+\bdata-rt=["\']top-area-play-button["\'][^>]+\bhref=["\'][^"\']*video/[\w-]+/[^"\']*\b(?:modalId|id)=([\w-]+)'),
|
||||
webpage, 'video id')
|
||||
info_dict = self._extract_by_video_id(svt_id)
|
||||
|
||||
info_dict = self._extract_by_video_id(svt_id, webpage)
|
||||
info_dict['thumbnail'] = thumbnail
|
||||
if not info_dict.get('title'):
|
||||
info_dict['title'] = re.sub(r'\s*\|\s*.+?$', '', self._og_search_title(webpage))
|
||||
if not info_dict.get('thumbnail'):
|
||||
info_dict['thumbnail'] = self._og_search_thumbnail(webpage)
|
||||
if not info_dict.get('description'):
|
||||
info_dict['description'] = traverse_obj(details, ('description', {str}))
|
||||
|
||||
return info_dict
|
||||
|
||||
|
||||
class SVTSeriesIE(SVTPlayBaseIE):
|
||||
class SVTSeriesIE(SVTBaseIE):
|
||||
IE_NAME = 'svt:play:series'
|
||||
_VALID_URL = r'https?://(?:www\.)?svtplay\.se/(?P<id>[^/?&#]+)(?:.+?\btab=(?P<season_slug>[^&#]+))?'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svtplay.se/rederiet',
|
||||
'info_dict': {
|
||||
'id': '14445680',
|
||||
'id': 'jpmQYgn',
|
||||
'title': 'Rederiet',
|
||||
'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
|
||||
'description': 'md5:f71122f7cf2e52b643e75915e04cb83d',
|
||||
},
|
||||
'playlist_mincount': 318,
|
||||
}, {
|
||||
'url': 'https://www.svtplay.se/rederiet?tab=season-2-14445680',
|
||||
'url': 'https://www.svtplay.se/rederiet?tab=season-2-jpmQYgn',
|
||||
'info_dict': {
|
||||
'id': 'season-2-14445680',
|
||||
'id': 'season-2-jpmQYgn',
|
||||
'title': 'Rederiet - Säsong 2',
|
||||
'description': 'md5:d9fdfff17f5d8f73468176ecd2836039',
|
||||
'description': 'md5:f71122f7cf2e52b643e75915e04cb83d',
|
||||
},
|
||||
'playlist_mincount': 12,
|
||||
}]
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super().suitable(url)
|
||||
return False if SVTPlayIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, season_id = self._match_valid_url(url).groups()
|
||||
@ -386,6 +339,7 @@ class SVTSeriesIE(SVTPlayBaseIE):
|
||||
|
||||
|
||||
class SVTPageIE(SVTBaseIE):
|
||||
IE_NAME = 'svt:page'
|
||||
_VALID_URL = r'https?://(?:www\.)?svt\.se/(?:[^/?#]+/)*(?P<id>[^/?&#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.svt.se/nyheter/lokalt/skane/viktor-18-forlorade-armar-och-ben-i-sepsis-vill-ateruppta-karaten-och-bli-svetsare',
|
||||
@ -463,7 +417,7 @@ class SVTPageIE(SVTBaseIE):
|
||||
|
||||
@classmethod
|
||||
def suitable(cls, url):
|
||||
return False if SVTIE.suitable(url) or SVTPlayIE.suitable(url) else super().suitable(url)
|
||||
return False if SVTPlayIE.suitable(url) else super().suitable(url)
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
|
@ -1,58 +0,0 @@
|
||||
from .adobepass import AdobePassIE
|
||||
from ..utils import (
|
||||
smuggle_url,
|
||||
update_url_query,
|
||||
)
|
||||
|
||||
|
||||
class SyfyIE(AdobePassIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:www\.)?syfy\.com/(?:[^/]+/)?videos/(?P<id>[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'http://www.syfy.com/theinternetruinedmylife/videos/the-internet-ruined-my-life-season-1-trailer',
|
||||
'info_dict': {
|
||||
'id': '2968097',
|
||||
'ext': 'mp4',
|
||||
'title': 'The Internet Ruined My Life: Season 1 Trailer',
|
||||
'description': 'One tweet, one post, one click, can destroy everything.',
|
||||
'uploader': 'NBCU-MPAT',
|
||||
'upload_date': '20170113',
|
||||
'timestamp': 1484345640,
|
||||
},
|
||||
'params': {
|
||||
# m3u8 download
|
||||
'skip_download': True,
|
||||
},
|
||||
'add_ie': ['ThePlatform'],
|
||||
'skip': 'Redirects to main page',
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
syfy_mpx = next(iter(self._parse_json(self._search_regex(
|
||||
r'jQuery\.extend\(Drupal\.settings\s*,\s*({.+?})\);', webpage, 'drupal settings'),
|
||||
display_id)['syfy']['syfy_mpx'].values()))
|
||||
video_id = syfy_mpx['mpxGUID']
|
||||
title = syfy_mpx['episodeTitle']
|
||||
query = {
|
||||
'mbr': 'true',
|
||||
'manifest': 'm3u',
|
||||
}
|
||||
if syfy_mpx.get('entitlement') == 'auth':
|
||||
resource = self._get_mvpd_resource(
|
||||
'syfy', title, video_id,
|
||||
syfy_mpx.get('mpxRating', 'TV-14'))
|
||||
query['auth'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'syfy', resource)
|
||||
|
||||
return {
|
||||
'_type': 'url_transparent',
|
||||
'ie_key': 'ThePlatform',
|
||||
'url': smuggle_url(update_url_query(
|
||||
self._proto_relative_url(syfy_mpx['releaseURL']), query),
|
||||
{'force_smil_url': True}),
|
||||
'title': title,
|
||||
'id': video_id,
|
||||
'display_id': display_id,
|
||||
}
|
@ -32,6 +32,10 @@ class TBSIE(TurnerBaseIE):
|
||||
'url': 'http://www.tntdrama.com/movies/star-wars-a-new-hope',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SOFTWARE_STATEMENT_MAP = {
|
||||
'tbs': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJkZTA0NTYxZS1iMTFhLTRlYTgtYTg5NC01NjI3MGM1NmM2MWIiLCJuYmYiOjE1MzcxODkzOTAsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTg5MzkwfQ.Z7ny66kaqNDdCHf9Y9KsV12LrBxrLkGGxlYe2XGm6qsw2T-k1OCKC1TMzeqiZP735292MMRAQkcJDKrMIzNbAuf9nCdIcv4kE1E2nqUnjPMBduC1bHffZp8zlllyrN2ElDwM8Vhwv_5nElLRwWGEt0Kaq6KJAMZA__WDxKWC18T-wVtsOZWXQpDqO7nByhfj2t-Z8c3TUNVsA_wHgNXlkzJCZ16F2b7yGLT5ZhLPupOScd3MXC5iPh19HSVIok22h8_F_noTmGzmMnIRQi6bWYWK2zC7TQ_MsYHfv7V6EaG5m1RKZTV6JAwwoJQF_9ByzarLV1DGwZxD9-eQdqswvg',
|
||||
'tntdrama': 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwOTMxYTU4OS1jZjEzLTRmNjMtYTJmYy03MzhjMjE1NWU5NjEiLCJuYmYiOjE1MzcxOTA4MjcsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODI3fQ.AucKvtws7oekTXi80_zX4-BlgJD9GLvlOI9FlBCjdlx7Pa3eJ0AqbogynKMiatMbnLOTMHGjd7tTiq422unmZjBz70dhePAe9BbW0dIo7oQ57vZ-VBYw_tWYRPmON61MwAbLVlqROD3n_zURs85S8TlkQx9aNx9x_riGGELjd8l05CVa_pOluNhYvuIFn6wmrASOKI1hNEblBDWh468UWP571-fe4zzi0rlYeeHd-cjvtWvOB3bQsWrUVbK4pRmqvzEH59j0vNF-ihJF9HncmUicYONe47Mib3elfMok23v4dB1_UAlQY_oawfNcynmEnJQCcqFmbHdEwTW6gMiYsA',
|
||||
}
|
||||
|
||||
def _real_extract(self, url):
|
||||
site, path, display_id = self._match_valid_url(url).groups()
|
||||
@ -48,7 +52,7 @@ class TBSIE(TurnerBaseIE):
|
||||
drupal_settings['ngtv_token_url']).query)
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, tokenizer_query, {
|
||||
media_id, tokenizer_query, self._SOFTWARE_STATEMENT_MAP[site], {
|
||||
'url': url,
|
||||
'site_name': site[:3].upper(),
|
||||
'auth_required': video_data.get('authRequired') == '1' or is_live,
|
||||
|
@ -156,6 +156,7 @@ class TeamcocoIE(TeamcocoBaseIE):
|
||||
|
||||
|
||||
class ConanClassicIE(TeamcocoBaseIE):
|
||||
_WORKING = False
|
||||
_VALID_URL = r'https?://(?:(?:www\.)?conanclassic|conan25\.teamcoco)\.com/(?P<id>([^/]+/)*[^/?#]+)'
|
||||
_TESTS = [{
|
||||
'url': 'https://conanclassic.com/video/ice-cube-kevin-hart-conan-share-lyft',
|
||||
@ -263,7 +264,7 @@ class ConanClassicIE(TeamcocoBaseIE):
|
||||
info.update(self._extract_ngtv_info(media_id, {
|
||||
'accessToken': token,
|
||||
'accessTokenType': 'jws',
|
||||
}))
|
||||
}, None)) # TODO: the None arg needs to be the AdobePass software_statement
|
||||
else:
|
||||
formats, subtitles = self._get_formats_and_subtitles(
|
||||
traverse_obj(response, ('data', 'findRecordVideoMetadata')), video_id)
|
||||
|
@ -63,6 +63,17 @@ class TelecincoBaseIE(InfoExtractor):
|
||||
'http_headers': headers,
|
||||
}
|
||||
|
||||
def _download_akamai_webpage(self, url, display_id):
|
||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
||||
return self._download_webpage(url, display_id, headers={
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||
raise
|
||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
||||
return self._download_webpage(url, display_id, impersonate=True)
|
||||
|
||||
|
||||
class TelecincoIE(TelecincoBaseIE):
|
||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||
@ -140,7 +151,7 @@ class TelecincoIE(TelecincoBaseIE):
|
||||
|
||||
def _real_extract(self, url):
|
||||
display_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
webpage = self._download_akamai_webpage(url, display_id)
|
||||
article = self._search_json(
|
||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
||||
webpage, 'article', display_id)['article']
|
||||
|
@ -6,32 +6,32 @@ from ..utils import int_or_none, traverse_obj, url_or_none, urljoin
|
||||
|
||||
|
||||
class TenPlayIE(InfoExtractor):
|
||||
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/]+/)+(?P<id>tpv\d{6}[a-z]{5})'
|
||||
IE_NAME = '10play'
|
||||
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?:[^/?#]+/)+(?P<id>tpv\d{6}[a-z]{5})'
|
||||
_NETRC_MACHINE = '10play'
|
||||
_TESTS = [{
|
||||
'url': 'https://10play.com.au/neighbours/web-extras/season-41/heres-a-first-look-at-mischa-bartons-neighbours-debut/tpv230911hyxnz',
|
||||
# Geo-restricted to Australia
|
||||
'url': 'https://10play.com.au/australian-survivor/web-extras/season-10-brains-v-brawn-ii/myless-journey/tpv250414jdmtf',
|
||||
'info_dict': {
|
||||
'id': '6336940246112',
|
||||
'id': '7440980000013868',
|
||||
'ext': 'mp4',
|
||||
'title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
|
||||
'alt_title': 'Here\'s A First Look At Mischa Barton\'s Neighbours Debut',
|
||||
'description': 'Neighbours Premieres Monday, September 18 At 4:30pm On 10 And 10 Play And 6:30pm On 10 Peach',
|
||||
'duration': 74,
|
||||
'season': 'Season 41',
|
||||
'season_number': 41,
|
||||
'series': 'Neighbours',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'title': 'Myles\'s Journey',
|
||||
'alt_title': 'Myles\'s Journey',
|
||||
'description': 'Relive Myles\'s epic Brains V Brawn II journey to reach the game\'s final two',
|
||||
'uploader': 'Channel 10',
|
||||
'age_limit': 15,
|
||||
'timestamp': 1694386800,
|
||||
'upload_date': '20230910',
|
||||
'uploader_id': '2199827728001',
|
||||
'age_limit': 15,
|
||||
'duration': 249,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'series': 'Australian Survivor',
|
||||
'season': 'Season 10',
|
||||
'season_number': 10,
|
||||
'timestamp': 1744629420,
|
||||
'upload_date': '20250414',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Geo-restricted to Australia
|
||||
'url': 'https://10play.com.au/neighbours/episodes/season-42/episode-9107/tpv240902nzqyp',
|
||||
'info_dict': {
|
||||
'id': '9000000000091177',
|
||||
@ -45,17 +45,38 @@ class TenPlayIE(InfoExtractor):
|
||||
'season': 'Season 42',
|
||||
'season_number': 42,
|
||||
'series': 'Neighbours',
|
||||
'thumbnail': r're:https://.*\.jpg',
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'age_limit': 15,
|
||||
'timestamp': 1725517860,
|
||||
'upload_date': '20240905',
|
||||
'uploader': 'Channel 10',
|
||||
'uploader_id': '2199827728001',
|
||||
},
|
||||
'params': {
|
||||
'skip_download': True,
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
}, {
|
||||
# Geo-restricted to Australia; upgrading the m3u8 quality fails and we need the fallback
|
||||
'url': 'https://10play.com.au/tiny-chef-show/episodes/season-1/episode-2/tpv240228pofvt',
|
||||
'info_dict': {
|
||||
'id': '9000000000084116',
|
||||
'ext': 'mp4',
|
||||
'uploader': 'Channel 10',
|
||||
'uploader_id': '2199827728001',
|
||||
'duration': 1297,
|
||||
'title': 'The Tiny Chef Show - S1 Ep. 2',
|
||||
'alt_title': 'S1 Ep. 2 - Popcorn/banana',
|
||||
'description': 'md5:d4758b52b5375dfaa67a78261dcb5763',
|
||||
'age_limit': 0,
|
||||
'series': 'The Tiny Chef Show',
|
||||
'season_number': 1,
|
||||
'episode_number': 2,
|
||||
'timestamp': 1747957740,
|
||||
'thumbnail': r're:https://.+/.+\.jpg',
|
||||
'upload_date': '20250522',
|
||||
'season': 'Season 1',
|
||||
'episode': 'Episode 2',
|
||||
},
|
||||
'skip': 'Only available in Australia',
|
||||
'params': {'skip_download': 'm3u8'},
|
||||
'expected_warnings': ['Failed to download m3u8 information: HTTP Error 502'],
|
||||
}, {
|
||||
'url': 'https://10play.com.au/how-to-stay-married/web-extras/season-1/terrys-talks-ep-1-embracing-change/tpv190915ylupc',
|
||||
'only_matching': True,
|
||||
@ -86,8 +107,11 @@ class TenPlayIE(InfoExtractor):
|
||||
if '10play-not-in-oz' in m3u8_url:
|
||||
self.raise_geo_restricted(countries=['AU'])
|
||||
# Attempt to get a higher quality stream
|
||||
m3u8_url = m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000')
|
||||
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
|
||||
formats = self._extract_m3u8_formats(
|
||||
m3u8_url.replace(',150,75,55,0000', ',300,150,75,55,0000'),
|
||||
content_id, 'mp4', fatal=False)
|
||||
if not formats:
|
||||
formats = self._extract_m3u8_formats(m3u8_url, content_id, 'mp4')
|
||||
|
||||
return {
|
||||
'id': content_id,
|
||||
@ -112,21 +136,22 @@ class TenPlayIE(InfoExtractor):
|
||||
|
||||
|
||||
class TenPlaySeasonIE(InfoExtractor):
|
||||
IE_NAME = '10play:season'
|
||||
_VALID_URL = r'https?://(?:www\.)?10play\.com\.au/(?P<show>[^/?#]+)/episodes/(?P<season>[^/?#]+)/?(?:$|[?#])'
|
||||
_TESTS = [{
|
||||
'url': 'https://10play.com.au/masterchef/episodes/season-14',
|
||||
'url': 'https://10play.com.au/masterchef/episodes/season-15',
|
||||
'info_dict': {
|
||||
'title': 'Season 14',
|
||||
'id': 'MjMyOTIy',
|
||||
'title': 'Season 15',
|
||||
'id': 'MTQ2NjMxOQ==',
|
||||
},
|
||||
'playlist_mincount': 64,
|
||||
'playlist_mincount': 50,
|
||||
}, {
|
||||
'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2022',
|
||||
'url': 'https://10play.com.au/the-bold-and-the-beautiful-fast-tracked/episodes/season-2024',
|
||||
'info_dict': {
|
||||
'title': 'Season 2022',
|
||||
'title': 'Season 2024',
|
||||
'id': 'Mjc0OTIw',
|
||||
},
|
||||
'playlist_mincount': 256,
|
||||
'playlist_mincount': 159,
|
||||
}]
|
||||
|
||||
def _entries(self, load_more_url, display_id=None):
|
||||
|
@ -12,11 +12,13 @@ from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
mimetype2ext,
|
||||
parse_age_limit,
|
||||
parse_qs,
|
||||
traverse_obj,
|
||||
unsmuggle_url,
|
||||
update_url,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlhandle_detect_ext,
|
||||
xpath_with_ns,
|
||||
)
|
||||
@ -63,62 +65,53 @@ class ThePlatformBaseIE(AdobePassIE):
|
||||
|
||||
return formats, subtitles
|
||||
|
||||
def _download_theplatform_metadata(self, path, video_id):
|
||||
info_url = f'http://link.theplatform.{self._TP_TLD}/s/{path}?format=preview'
|
||||
return self._download_json(info_url, video_id)
|
||||
def _download_theplatform_metadata(self, path, video_id, fatal=True):
|
||||
return self._download_json(
|
||||
f'https://link.theplatform.{self._TP_TLD}/s/{path}', video_id,
|
||||
fatal=fatal, query={'format': 'preview'}) or {}
|
||||
|
||||
def _parse_theplatform_metadata(self, info):
|
||||
subtitles = {}
|
||||
captions = info.get('captions')
|
||||
if isinstance(captions, list):
|
||||
for caption in captions:
|
||||
lang, src, mime = caption.get('lang', 'en'), caption.get('src'), caption.get('type')
|
||||
subtitles.setdefault(lang, []).append({
|
||||
'ext': mimetype2ext(mime),
|
||||
'url': src,
|
||||
})
|
||||
@staticmethod
|
||||
def _parse_theplatform_metadata(tp_metadata):
|
||||
def site_specific_filter(*fields):
|
||||
return lambda k, v: v and k.endswith(tuple(f'${f}' for f in fields))
|
||||
|
||||
duration = info.get('duration')
|
||||
tp_chapters = info.get('chapters', [])
|
||||
chapters = []
|
||||
if tp_chapters:
|
||||
def _add_chapter(start_time, end_time):
|
||||
start_time = float_or_none(start_time, 1000)
|
||||
end_time = float_or_none(end_time, 1000)
|
||||
if start_time is None or end_time is None:
|
||||
return
|
||||
chapters.append({
|
||||
'start_time': start_time,
|
||||
'end_time': end_time,
|
||||
})
|
||||
info = traverse_obj(tp_metadata, {
|
||||
'title': ('title', {str}),
|
||||
'episode': ('title', {str}),
|
||||
'description': ('description', {str}),
|
||||
'thumbnail': ('defaultThumbnailUrl', {url_or_none}),
|
||||
'duration': ('duration', {float_or_none(scale=1000)}),
|
||||
'timestamp': ('pubDate', {float_or_none(scale=1000)}),
|
||||
'uploader': ('billingCode', {str}),
|
||||
'creators': ('author', {str}, filter, all, filter),
|
||||
'categories': (
|
||||
'categories', lambda _, v: v.get('label') in ['category', None],
|
||||
'name', {str}, filter, all, filter),
|
||||
'tags': ('keywords', {str}, filter, {lambda x: re.split(r'[;,]\s?', x)}, filter),
|
||||
'age_limit': ('ratings', ..., 'rating', {parse_age_limit}, any),
|
||||
'season_number': (site_specific_filter('seasonNumber'), {int_or_none}, any),
|
||||
'episode_number': (site_specific_filter('episodeNumber', 'airOrder'), {int_or_none}, any),
|
||||
'series': (site_specific_filter('show', 'seriesTitle', 'seriesShortTitle'), (None, ...), {str}, any),
|
||||
'location': (site_specific_filter('region'), {str}, any),
|
||||
'media_type': (site_specific_filter('programmingType', 'type'), {str}, any),
|
||||
})
|
||||
|
||||
for chapter in tp_chapters[:-1]:
|
||||
_add_chapter(chapter.get('startTime'), chapter.get('endTime'))
|
||||
_add_chapter(tp_chapters[-1].get('startTime'), tp_chapters[-1].get('endTime') or duration)
|
||||
chapters = traverse_obj(tp_metadata, ('chapters', ..., {
|
||||
'start_time': ('startTime', {float_or_none(scale=1000)}),
|
||||
'end_time': ('endTime', {float_or_none(scale=1000)}),
|
||||
}))
|
||||
# Ignore pointless single chapters from short videos that span the entire video's duration
|
||||
if len(chapters) > 1 or traverse_obj(chapters, (0, 'end_time')):
|
||||
info['chapters'] = chapters
|
||||
|
||||
def extract_site_specific_field(field):
|
||||
# A number of sites have custom-prefixed keys, e.g. 'cbc$seasonNumber'
|
||||
return traverse_obj(info, lambda k, v: v and k.endswith(f'${field}'), get_all=False)
|
||||
info['subtitles'] = {}
|
||||
for caption in traverse_obj(tp_metadata, ('captions', lambda _, v: url_or_none(v['src']))):
|
||||
info['subtitles'].setdefault(caption.get('lang') or 'en', []).append({
|
||||
'url': caption['src'],
|
||||
'ext': mimetype2ext(caption.get('type')),
|
||||
})
|
||||
|
||||
return {
|
||||
'title': info['title'],
|
||||
'subtitles': subtitles,
|
||||
'description': info['description'],
|
||||
'thumbnail': info['defaultThumbnailUrl'],
|
||||
'duration': float_or_none(duration, 1000),
|
||||
'timestamp': int_or_none(info.get('pubDate'), 1000) or None,
|
||||
'uploader': info.get('billingCode'),
|
||||
'chapters': chapters,
|
||||
'creator': traverse_obj(info, ('author', {str})) or None,
|
||||
'categories': traverse_obj(info, (
|
||||
'categories', lambda _, v: v.get('label') in ('category', None), 'name', {str})) or None,
|
||||
'tags': traverse_obj(info, ('keywords', {lambda x: re.split(r'[;,]\s?', x) if x else None})),
|
||||
'location': extract_site_specific_field('region'),
|
||||
'series': extract_site_specific_field('show') or extract_site_specific_field('seriesTitle'),
|
||||
'season_number': int_or_none(extract_site_specific_field('seasonNumber')),
|
||||
'episode_number': int_or_none(extract_site_specific_field('episodeNumber')),
|
||||
'media_type': extract_site_specific_field('programmingType') or extract_site_specific_field('type'),
|
||||
}
|
||||
return info
|
||||
|
||||
def _extract_theplatform_metadata(self, path, video_id):
|
||||
info = self._download_theplatform_metadata(path, video_id)
|
||||
|
121
yt_dlp/extractor/toutiao.py
Normal file
121
yt_dlp/extractor/toutiao.py
Normal file
@ -0,0 +1,121 @@
|
||||
import json
|
||||
import urllib.parse
|
||||
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
str_or_none,
|
||||
try_call,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class ToutiaoIE(InfoExtractor):
|
||||
IE_NAME = 'toutiao'
|
||||
IE_DESC = '今日头条'
|
||||
|
||||
_VALID_URL = r'https?://www\.toutiao\.com/video/(?P<id>\d+)/?(?:[?#]|$)'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.toutiao.com/video/7505382061495176511/',
|
||||
'info_dict': {
|
||||
'id': '7505382061495176511',
|
||||
'ext': 'mp4',
|
||||
'title': '新疆多地现不明飞行物,目击者称和月亮一样亮,几秒内突然加速消失,气象部门回应',
|
||||
'comment_count': int,
|
||||
'duration': 9.753,
|
||||
'like_count': int,
|
||||
'release_date': '20250517',
|
||||
'release_timestamp': 1747483344,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '极目新闻',
|
||||
'uploader_id': 'MS4wLjABAAAAeateBb9Su8I3MJOZozmvyzWktmba5LMlliRDz1KffnM',
|
||||
'view_count': int,
|
||||
},
|
||||
}, {
|
||||
'url': 'https://www.toutiao.com/video/7479446610359878153/',
|
||||
'info_dict': {
|
||||
'id': '7479446610359878153',
|
||||
'ext': 'mp4',
|
||||
'title': '小伙竟然利用两块磁铁制作成磁力减震器,简直太有创意了!',
|
||||
'comment_count': int,
|
||||
'duration': 118.374,
|
||||
'like_count': int,
|
||||
'release_date': '20250308',
|
||||
'release_timestamp': 1741444368,
|
||||
'thumbnail': r're:https?://p\d+-sign\.toutiaoimg\.com/.+$',
|
||||
'uploader': '小莉创意发明',
|
||||
'uploader_id': 'MS4wLjABAAAA4f7d4mwtApALtHIiq-QM20dwXqe32NUz0DeWF7wbHKw',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._get_cookies('https://www.toutiao.com').get('ttwid'):
|
||||
return
|
||||
|
||||
urlh = self._request_webpage(
|
||||
'https://ttwid.bytedance.com/ttwid/union/register/', None,
|
||||
'Fetching ttwid', 'Unable to fetch ttwid', headers={
|
||||
'Content-Type': 'application/json',
|
||||
}, data=json.dumps({
|
||||
'aid': 24,
|
||||
'needFid': False,
|
||||
'region': 'cn',
|
||||
'service': 'www.toutiao.com',
|
||||
'union': True,
|
||||
}).encode(),
|
||||
)
|
||||
|
||||
if ttwid := try_call(lambda: self._get_cookies(urlh.url)['ttwid'].value):
|
||||
self._set_cookie('.toutiao.com', 'ttwid', ttwid)
|
||||
return
|
||||
|
||||
self.raise_login_required()
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
webpage = self._download_webpage(url, video_id)
|
||||
video_data = traverse_obj(webpage, (
|
||||
{find_element(tag='script', id='RENDER_DATA')},
|
||||
{urllib.parse.unquote}, {json.loads}, 'data', 'initialVideo',
|
||||
))
|
||||
|
||||
formats = []
|
||||
for video in traverse_obj(video_data, (
|
||||
'videoPlayInfo', 'video_list', lambda _, v: v['main_url'],
|
||||
)):
|
||||
formats.append({
|
||||
'url': video['main_url'],
|
||||
**traverse_obj(video, ('video_meta', {
|
||||
'acodec': ('audio_profile', {str}),
|
||||
'asr': ('audio_sample_rate', {int_or_none}),
|
||||
'audio_channels': ('audio_channels', {float_or_none}, {int_or_none}),
|
||||
'ext': ('vtype', {str}),
|
||||
'filesize': ('size', {int_or_none}),
|
||||
'format_id': ('definition', {str}),
|
||||
'fps': ('fps', {int_or_none}),
|
||||
'height': ('vheight', {int_or_none}),
|
||||
'tbr': ('real_bitrate', {float_or_none(scale=1000)}),
|
||||
'vcodec': ('codec_type', {str}),
|
||||
'width': ('vwidth', {int_or_none}),
|
||||
})),
|
||||
})
|
||||
|
||||
return {
|
||||
'id': video_id,
|
||||
'formats': formats,
|
||||
**traverse_obj(video_data, {
|
||||
'comment_count': ('commentCount', {int_or_none}),
|
||||
'duration': ('videoPlayInfo', 'video_duration', {float_or_none}),
|
||||
'like_count': ('repinCount', {int_or_none}),
|
||||
'release_timestamp': ('publishTime', {int_or_none}),
|
||||
'thumbnail': (('poster', 'coverUrl'), {url_or_none}, any),
|
||||
'title': ('title', {str}),
|
||||
'uploader': ('userInfo', 'name', {str}),
|
||||
'uploader_id': ('userInfo', 'userId', {str_or_none}),
|
||||
'view_count': ('playCount', {int_or_none}),
|
||||
'webpage_url': ('detailUrl', {url_or_none}),
|
||||
}),
|
||||
}
|
@ -20,6 +20,7 @@ class TruTVIE(TurnerBaseIE):
|
||||
'skip_download': True,
|
||||
},
|
||||
}
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiJhYzQyOTkwMi0xMDYzLTQyNTQtYWJlYS1iZTY2ODM4MTVmZGIiLCJuYmYiOjE1MzcxOTA4NjgsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNTM3MTkwODY4fQ.ewXl5LDMDvvx3nDXV4jCdSwUq_sOluKoOVsIjznAo6Zo4zrGe9rjlZ9DOmQKW66g6VRMexJsJ5vM1EkY8TC5-YcQw_BclK1FPGO1rH3Wf7tX_l0b1BVbSJQKIj9UgqDp_QbGcBXz24kN4So3U22mhs6di9PYyyfG68ccKL2iRprcVKWCslIHwUF-T7FaEqb0K57auilxeW1PONG2m-lIAcZ62DUwqXDWvw0CRoWI08aVVqkkhnXaSsQfLs5Ph1Pfh9Oq3g_epUm9Ss45mq6XM7gbOb5omTcKLADRKK-PJVB_JXnZnlsXbG0ttKE1cTKJ738qu7j4aipYTf-W0nKF5Q'
|
||||
|
||||
def _real_extract(self, url):
|
||||
series_slug, clip_slug, video_id = self._match_valid_url(url).groups()
|
||||
@ -39,7 +40,7 @@ class TruTVIE(TurnerBaseIE):
|
||||
title = video_data['title'].strip()
|
||||
|
||||
info = self._extract_ngtv_info(
|
||||
media_id, {}, {
|
||||
media_id, {}, self._SOFTWARE_STATEMENT, {
|
||||
'url': url,
|
||||
'site_name': 'truTV',
|
||||
'auth_required': video_data.get('isAuthRequired'),
|
||||
|
@ -22,7 +22,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
def _extract_timestamp(self, video_data):
|
||||
return int_or_none(xpath_attr(video_data, 'dateCreated', 'uts'))
|
||||
|
||||
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, custom_tokenizer_query=None):
|
||||
def _add_akamai_spe_token(self, tokenizer_src, video_url, content_id, ap_data, software_statement, custom_tokenizer_query=None):
|
||||
secure_path = self._search_regex(r'https?://[^/]+(.+/)', video_url, 'secure path') + '*'
|
||||
token = self._AKAMAI_SPE_TOKEN_CACHE.get(secure_path)
|
||||
if not token:
|
||||
@ -34,7 +34,8 @@ class TurnerBaseIE(AdobePassIE):
|
||||
else:
|
||||
query['videoId'] = content_id
|
||||
if ap_data.get('auth_required'):
|
||||
query['accessToken'] = self._extract_mvpd_auth(ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'])
|
||||
query['accessToken'] = self._extract_mvpd_auth(
|
||||
ap_data['url'], content_id, ap_data['site_name'], ap_data['site_name'], software_statement)
|
||||
auth = self._download_xml(
|
||||
tokenizer_src, content_id, query=query)
|
||||
error_msg = xpath_text(auth, 'error/msg')
|
||||
@ -46,7 +47,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
self._AKAMAI_SPE_TOKEN_CACHE[secure_path] = token
|
||||
return video_url + '?hdnea=' + token
|
||||
|
||||
def _extract_cvp_info(self, data_src, video_id, path_data={}, ap_data={}, fatal=False):
|
||||
def _extract_cvp_info(self, data_src, video_id, software_statement, path_data={}, ap_data={}, fatal=False):
|
||||
video_data = self._download_xml(
|
||||
data_src, video_id,
|
||||
transform_source=lambda s: fix_xml_ampersands(s).strip(),
|
||||
@ -101,7 +102,7 @@ class TurnerBaseIE(AdobePassIE):
|
||||
video_url = self._add_akamai_spe_token(
|
||||
secure_path_data['tokenizer_src'],
|
||||
secure_path_data['media_src'] + video_url,
|
||||
content_id, ap_data)
|
||||
content_id, ap_data, software_statement)
|
||||
elif not re.match('https?://', video_url):
|
||||
base_path_data = path_data.get(ext, path_data.get('default', {}))
|
||||
media_src = base_path_data.get('media_src')
|
||||
@ -215,10 +216,12 @@ class TurnerBaseIE(AdobePassIE):
|
||||
'is_live': is_live,
|
||||
}
|
||||
|
||||
def _extract_ngtv_info(self, media_id, tokenizer_query, ap_data=None):
|
||||
def _extract_ngtv_info(self, media_id, tokenizer_query, software_statement, ap_data=None):
|
||||
if not isinstance(ap_data, dict):
|
||||
ap_data = {}
|
||||
is_live = ap_data.get('is_live')
|
||||
streams_data = self._download_json(
|
||||
f'http://medium.ngtv.io/media/{media_id}/tv',
|
||||
f'https://medium.ngtv.io/media/{media_id}/tv',
|
||||
media_id)['media']['tv']
|
||||
duration = None
|
||||
chapters = []
|
||||
@ -230,8 +233,8 @@ class TurnerBaseIE(AdobePassIE):
|
||||
continue
|
||||
if stream_data.get('playlistProtection') == 'spe':
|
||||
m3u8_url = self._add_akamai_spe_token(
|
||||
'http://token.ngtv.io/token/token_spe',
|
||||
m3u8_url, media_id, ap_data or {}, tokenizer_query)
|
||||
'https://token.ngtv.io/token/token_spe',
|
||||
m3u8_url, media_id, ap_data, software_statement, tokenizer_query)
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
m3u8_url, media_id, 'mp4', m3u8_id='hls', live=is_live, fatal=False))
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import hashlib
|
||||
import itertools
|
||||
import re
|
||||
|
||||
@ -16,6 +17,7 @@ from ..utils import (
|
||||
str_to_int,
|
||||
try_get,
|
||||
unified_timestamp,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
urlencode_postdata,
|
||||
urljoin,
|
||||
@ -171,6 +173,10 @@ class TwitCastingIE(InfoExtractor):
|
||||
'player': 'pc_web',
|
||||
})
|
||||
|
||||
password_params = {
|
||||
'word': hashlib.md5(video_password.encode()).hexdigest(),
|
||||
} if video_password else None
|
||||
|
||||
formats = []
|
||||
# low: 640x360, medium: 1280x720, high: 1920x1080
|
||||
qq = qualities(['low', 'medium', 'high'])
|
||||
@ -178,7 +184,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
'tc-hls', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': m3u8_url,
|
||||
'url': update_url_query(m3u8_url, password_params),
|
||||
'format_id': f'hls-{quality}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(quality),
|
||||
@ -192,7 +198,7 @@ class TwitCastingIE(InfoExtractor):
|
||||
'llfmp4', 'streams', {dict.items}, lambda _, v: url_or_none(v[1]),
|
||||
)):
|
||||
formats.append({
|
||||
'url': ws_url,
|
||||
'url': update_url_query(ws_url, password_params),
|
||||
'format_id': f'ws-{mode}',
|
||||
'ext': 'mp4',
|
||||
'quality': qq(mode),
|
||||
|
@ -20,7 +20,6 @@ from ..utils import (
|
||||
remove_end,
|
||||
str_or_none,
|
||||
strip_or_none,
|
||||
traverse_obj,
|
||||
truncate_string,
|
||||
try_call,
|
||||
try_get,
|
||||
@ -29,6 +28,7 @@ from ..utils import (
|
||||
url_or_none,
|
||||
xpath_text,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class TwitterBaseIE(InfoExtractor):
|
||||
@ -1596,8 +1596,8 @@ class TwitterAmplifyIE(TwitterBaseIE):
|
||||
|
||||
class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
IE_NAME = 'twitter:broadcast'
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
|
||||
|
||||
_VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?P<type>broadcasts|events)/(?P<id>\w+)'
|
||||
_TESTS = [{
|
||||
# untitled Periscope video
|
||||
'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
|
||||
@ -1605,6 +1605,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1yNGaQLWpejGj',
|
||||
'ext': 'mp4',
|
||||
'title': 'Andrea May Sahouri - Periscope Broadcast',
|
||||
'display_id': '1yNGaQLWpejGj',
|
||||
'uploader': 'Andrea May Sahouri',
|
||||
'uploader_id': 'andreamsahouri',
|
||||
'uploader_url': 'https://twitter.com/andreamsahouri',
|
||||
@ -1612,6 +1613,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20200601',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
|
||||
@ -1619,6 +1622,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1ZkKzeyrPbaxv',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship | SN10 | High-Altitude Flight Test',
|
||||
'display_id': '1ZkKzeyrPbaxv',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@ -1626,6 +1630,8 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20210303',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
|
||||
@ -1633,6 +1639,7 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'id': '1OyKAVQrgzwGb',
|
||||
'ext': 'mp4',
|
||||
'title': 'Starship Flight Test',
|
||||
'display_id': '1OyKAVQrgzwGb',
|
||||
'uploader': 'SpaceX',
|
||||
'uploader_id': 'SpaceX',
|
||||
'uploader_url': 'https://twitter.com/SpaceX',
|
||||
@ -1640,21 +1647,58 @@ class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
|
||||
'upload_date': '20230420',
|
||||
'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
|
||||
'view_count': int,
|
||||
'concurrent_view_count': int,
|
||||
'live_status': 'was_live',
|
||||
},
|
||||
}, {
|
||||
'url': 'https://x.com/i/events/1910629646300762112',
|
||||
'info_dict': {
|
||||
'id': '1LyxBWDRNqyKN',
|
||||
'ext': 'mp4',
|
||||
'title': '#ガンニバル ウォッチパーティー',
|
||||
'concurrent_view_count': int,
|
||||
'display_id': '1910629646300762112',
|
||||
'live_status': 'was_live',
|
||||
'release_date': '20250423',
|
||||
'release_timestamp': 1745409000,
|
||||
'tags': ['ガンニバル'],
|
||||
'thumbnail': r're:https?://[^?#]+\.jpg\?token=',
|
||||
'timestamp': 1745403328,
|
||||
'upload_date': '20250423',
|
||||
'uploader': 'ディズニープラス公式',
|
||||
'uploader_id': 'DisneyPlusJP',
|
||||
'uploader_url': 'https://twitter.com/DisneyPlusJP',
|
||||
'view_count': int,
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
broadcast_id = self._match_id(url)
|
||||
broadcast_type, display_id = self._match_valid_url(url).group('type', 'id')
|
||||
|
||||
if broadcast_type == 'events':
|
||||
timeline = self._call_api(
|
||||
f'live_event/1/{display_id}/timeline.json', display_id)
|
||||
broadcast_id = traverse_obj(timeline, (
|
||||
'twitter_objects', 'broadcasts', ..., ('id', 'broadcast_id'),
|
||||
{str}, any, {require('broadcast ID')}))
|
||||
else:
|
||||
broadcast_id = display_id
|
||||
|
||||
broadcast = self._call_api(
|
||||
'broadcasts/show.json', broadcast_id,
|
||||
{'ids': broadcast_id})['broadcasts'][broadcast_id]
|
||||
if not broadcast:
|
||||
raise ExtractorError('Broadcast no longer exists', expected=True)
|
||||
info = self._parse_broadcast_data(broadcast, broadcast_id)
|
||||
info['title'] = broadcast.get('status') or info.get('title')
|
||||
info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
|
||||
info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
|
||||
info.update({
|
||||
'display_id': display_id,
|
||||
'title': broadcast.get('status') or info.get('title'),
|
||||
'uploader_id': broadcast.get('twitter_username') or info.get('uploader_id'),
|
||||
'uploader_url': format_field(
|
||||
broadcast, 'twitter_username', 'https://twitter.com/%s', default=None),
|
||||
})
|
||||
if info['live_status'] == 'is_upcoming':
|
||||
self.raise_no_formats('This live broadcast has not yet started', expected=True)
|
||||
return info
|
||||
|
||||
media_key = broadcast['media_key']
|
||||
|
@ -1,98 +1,53 @@
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
int_or_none,
|
||||
parse_filesize,
|
||||
parse_iso8601,
|
||||
)
|
||||
from ..utils import clean_html
|
||||
from ..utils.traversal import find_element, traverse_obj
|
||||
|
||||
|
||||
class UMGDeIE(InfoExtractor):
|
||||
_WORKING = False
|
||||
IE_NAME = 'umg:de'
|
||||
IE_DESC = 'Universal Music Deutschland'
|
||||
_VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/]+/videos/[^/?#]+-(?P<id>\d+)'
|
||||
_TEST = {
|
||||
_VALID_URL = r'https?://(?:www\.)?universal-music\.de/[^/?#]+/videos/(?P<slug>[^/?#]+-(?P<id>\d+))'
|
||||
_TESTS = [{
|
||||
'url': 'https://www.universal-music.de/sido/videos/jedes-wort-ist-gold-wert-457803',
|
||||
'md5': 'ebd90f48c80dcc82f77251eb1902634f',
|
||||
'info_dict': {
|
||||
'id': '457803',
|
||||
'ext': 'mp4',
|
||||
'title': 'Jedes Wort ist Gold wert',
|
||||
'artists': ['Sido'],
|
||||
'description': 'md5:df2dbffcff1a74e0a7c9bef4b497aeec',
|
||||
'display_id': 'jedes-wort-ist-gold-wert-457803',
|
||||
'duration': 210.0,
|
||||
'thumbnail': r're:https?://images\.universal-music\.de/img/assets/.+\.jpg',
|
||||
'timestamp': 1513591800,
|
||||
'upload_date': '20171218',
|
||||
'view_count': int,
|
||||
},
|
||||
}
|
||||
}, {
|
||||
'url': 'https://www.universal-music.de/alexander-eder/videos/der-doktor-hat-gesagt-609533',
|
||||
'info_dict': {
|
||||
'id': '609533',
|
||||
'ext': 'mp4',
|
||||
'title': 'Der Doktor hat gesagt',
|
||||
'artists': ['Alexander Eder'],
|
||||
'display_id': 'der-doktor-hat-gesagt-609533',
|
||||
'duration': 146.0,
|
||||
'thumbnail': r're:https?://images\.universal-music\.de/img/assets/.+\.jpg',
|
||||
'timestamp': 1742982100,
|
||||
'upload_date': '20250326',
|
||||
},
|
||||
}]
|
||||
|
||||
def _real_extract(self, url):
|
||||
video_id = self._match_id(url)
|
||||
video_data = self._download_json(
|
||||
'https://graphql.universal-music.de/',
|
||||
video_id, query={
|
||||
'query': '''{
|
||||
universalMusic(channel:16) {
|
||||
video(id:%s) {
|
||||
headline
|
||||
formats {
|
||||
formatId
|
||||
url
|
||||
type
|
||||
width
|
||||
height
|
||||
mimeType
|
||||
fileSize
|
||||
}
|
||||
duration
|
||||
createdDate
|
||||
}
|
||||
}
|
||||
}''' % video_id})['data']['universalMusic']['video'] # noqa: UP031
|
||||
|
||||
title = video_data['headline']
|
||||
hls_url_template = 'http://mediadelivery.universal-music-services.de/vod/mp4:autofill/storage/' + '/'.join(list(video_id)) + '/content/%s/file/playlist.m3u8'
|
||||
|
||||
thumbnails = []
|
||||
formats = []
|
||||
|
||||
def add_m3u8_format(format_id):
|
||||
formats.extend(self._extract_m3u8_formats(
|
||||
hls_url_template % format_id, video_id, 'mp4',
|
||||
'm3u8_native', m3u8_id='hls', fatal=False))
|
||||
|
||||
for f in video_data.get('formats', []):
|
||||
f_url = f.get('url')
|
||||
mime_type = f.get('mimeType')
|
||||
if not f_url or mime_type == 'application/mxf':
|
||||
continue
|
||||
fmt = {
|
||||
'url': f_url,
|
||||
'width': int_or_none(f.get('width')),
|
||||
'height': int_or_none(f.get('height')),
|
||||
'filesize': parse_filesize(f.get('fileSize')),
|
||||
}
|
||||
f_type = f.get('type')
|
||||
if f_type == 'Image':
|
||||
thumbnails.append(fmt)
|
||||
elif f_type == 'Video':
|
||||
format_id = f.get('formatId')
|
||||
if format_id:
|
||||
fmt['format_id'] = format_id
|
||||
if mime_type == 'video/mp4':
|
||||
add_m3u8_format(format_id)
|
||||
urlh = self._request_webpage(f_url, video_id, fatal=False)
|
||||
if urlh:
|
||||
first_byte = urlh.read(1)
|
||||
if first_byte not in (b'F', b'\x00'):
|
||||
continue
|
||||
formats.append(fmt)
|
||||
if not formats:
|
||||
for format_id in (867, 836, 940):
|
||||
add_m3u8_format(format_id)
|
||||
display_id, video_id = self._match_valid_url(url).group('slug', 'id')
|
||||
webpage = self._download_webpage(url, display_id)
|
||||
|
||||
return {
|
||||
**self._search_json_ld(webpage, display_id),
|
||||
'id': video_id,
|
||||
'title': title,
|
||||
'duration': int_or_none(video_data.get('duration')),
|
||||
'timestamp': parse_iso8601(video_data.get('createdDate'), ' '),
|
||||
'thumbnails': thumbnails,
|
||||
'formats': formats,
|
||||
'artists': traverse_obj(self._html_search_meta('umg-artist-screenname', webpage), (filter, all)),
|
||||
# The JSON LD description duplicates the title
|
||||
'description': traverse_obj(webpage, ({find_element(cls='_3Y0Lj')}, {clean_html})),
|
||||
'display_id': display_id,
|
||||
'formats': self._extract_m3u8_formats(
|
||||
'https://hls.universal-music.de/get', display_id, 'mp4', query={'id': video_id}),
|
||||
}
|
||||
|
@ -32,6 +32,7 @@ class ViceBaseIE(InfoExtractor):
|
||||
|
||||
|
||||
class ViceIE(ViceBaseIE, AdobePassIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'vice'
|
||||
_VALID_URL = r'https?://(?:(?:video|vms)\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/(?:video/[^/]+|embed)/(?P<id>[\da-f]{24})'
|
||||
_EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=["\'](?P<url>(?:https?:)?//video\.vice\.com/[^/]+/embed/[\da-f]{24})']
|
||||
@ -99,6 +100,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
|
||||
'url': 'https://www.viceland.com/en_us/video/thursday-march-1-2018/5a8f2d7ff1cdb332dd446ec1',
|
||||
'only_matching': True,
|
||||
}]
|
||||
_SOFTWARE_STATEMENT = 'eyJhbGciOiJSUzI1NiJ9.eyJzdWIiOiIwMTVjODBlZC04ZDcxLTQ4ZGEtOTZkZi00NzU5NjIwNzJlYTQiLCJuYmYiOjE2NjgwMTM0ODQsImlzcyI6ImF1dGguYWRvYmUuY29tIiwiaWF0IjoxNjY4MDEzNDg0fQ.CjhUnTrlh-bmYnEFHyC2Y4it5Y_Zfza1x66O4-ki5gBR7JT6aUunYI_YflXomQPACriMpObkITFz4grVaDwdd8Xp9hrQ2R0SwRBdaklkdy1_j68RqSP5PnexJIa0q_ThtOwfRBd5uGcb33nMJ9Qs92W4kVXuca0Ta-i7SJyWgXUaPDlRDdgyCL3hKj5wuM7qUIwrd9A5CMm-j3dMIBCDgw7X6TwRK65eUQe6gTWqcvL2yONHHTpmIfeOTUxGwwKFr29COOTBowm0VJ6HE08xjXCShP08Neusu-JsgkjzhkEbiDE2531EKgfAki_7WCd2JUZVsAsCusv4a1maokk6NA'
|
||||
|
||||
def _real_extract(self, url):
|
||||
locale, video_id = self._match_valid_url(url).groups()
|
||||
@ -116,7 +118,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
|
||||
resource = self._get_mvpd_resource(
|
||||
'VICELAND', title, video_id, rating)
|
||||
query['tvetoken'] = self._extract_mvpd_auth(
|
||||
url, video_id, 'VICELAND', resource)
|
||||
url, video_id, 'VICELAND', resource, self._SOFTWARE_STATEMENT)
|
||||
|
||||
# signature generation algorithm is reverse engineered from signatureGenerator in
|
||||
# webpack:///../shared/~/vice-player/dist/js/vice-player.js in
|
||||
@ -181,6 +183,7 @@ class ViceIE(ViceBaseIE, AdobePassIE):
|
||||
|
||||
|
||||
class ViceShowIE(ViceBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'vice:show'
|
||||
_VALID_URL = r'https?://(?:video\.vice|(?:www\.)?vice(?:land|tv))\.com/(?P<locale>[^/]+)/show/(?P<id>[^/?#&]+)'
|
||||
_PAGE_SIZE = 25
|
||||
@ -221,6 +224,7 @@ class ViceShowIE(ViceBaseIE):
|
||||
|
||||
|
||||
class ViceArticleIE(ViceBaseIE):
|
||||
_WORKING = False
|
||||
IE_NAME = 'vice:article'
|
||||
_VALID_URL = r'https?://(?:www\.)?vice\.com/(?P<locale>[^/]+)/article/(?:[0-9a-z]{6}/)?(?P<id>[^?#]+)'
|
||||
|
||||
|
@ -236,7 +236,7 @@ class VimeoBaseInfoExtractor(InfoExtractor):
|
||||
for tt in (request.get('text_tracks') or []):
|
||||
subtitles.setdefault(tt['lang'], []).append({
|
||||
'ext': 'vtt',
|
||||
'url': urljoin('https://vimeo.com', tt['url']),
|
||||
'url': urljoin('https://player.vimeo.com/', tt['url']),
|
||||
})
|
||||
|
||||
thumbnails = []
|
||||
|
@ -548,21 +548,21 @@ class VKIE(VKBaseIE):
|
||||
'formats': formats,
|
||||
'subtitles': subtitles,
|
||||
**traverse_obj(mv_data, {
|
||||
'title': ('title', {unescapeHTML}),
|
||||
'title': ('title', {str}, {unescapeHTML}),
|
||||
'description': ('desc', {clean_html}, filter),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'like_count': ('likes', {int_or_none}),
|
||||
'comment_count': ('commcount', {int_or_none}),
|
||||
}),
|
||||
**traverse_obj(data, {
|
||||
'title': ('md_title', {unescapeHTML}),
|
||||
'title': ('md_title', {str}, {unescapeHTML}),
|
||||
'description': ('description', {clean_html}, filter),
|
||||
'thumbnail': ('jpg', {url_or_none}),
|
||||
'uploader': ('md_author', {unescapeHTML}),
|
||||
'uploader': ('md_author', {str}, {unescapeHTML}),
|
||||
'uploader_id': (('author_id', 'authorId'), {str_or_none}, any),
|
||||
'duration': ('duration', {int_or_none}),
|
||||
'chapters': ('time_codes', lambda _, v: isinstance(v['time'], int), {
|
||||
'title': ('text', {unescapeHTML}),
|
||||
'title': ('text', {str}, {unescapeHTML}),
|
||||
'start_time': 'time',
|
||||
}),
|
||||
}),
|
||||
|
@ -1,4 +1,5 @@
|
||||
import base64
|
||||
import functools
|
||||
import hashlib
|
||||
import hmac
|
||||
import itertools
|
||||
@ -17,99 +18,227 @@ from ..utils import (
|
||||
UserNotLive,
|
||||
float_or_none,
|
||||
int_or_none,
|
||||
join_nonempty,
|
||||
jwt_decode_hs256,
|
||||
str_or_none,
|
||||
traverse_obj,
|
||||
try_call,
|
||||
update_url_query,
|
||||
url_or_none,
|
||||
)
|
||||
from ..utils.traversal import require, traverse_obj
|
||||
|
||||
|
||||
class WeverseBaseIE(InfoExtractor):
|
||||
_NETRC_MACHINE = 'weverse'
|
||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io/web/api'
|
||||
_ACCOUNT_API_BASE = 'https://accountapi.weverse.io'
|
||||
_CLIENT_PLATFORM = 'WEB'
|
||||
_SIGNING_KEY = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
||||
_ACCESS_TOKEN_KEY = 'we2_access_token'
|
||||
_REFRESH_TOKEN_KEY = 'we2_refresh_token'
|
||||
_DEVICE_ID_KEY = 'we2_device_id'
|
||||
_API_HEADERS = {
|
||||
'Accept': 'application/json',
|
||||
'Origin': 'https://weverse.io',
|
||||
'Referer': 'https://weverse.io/',
|
||||
'WEV-device-Id': str(uuid.uuid4()),
|
||||
}
|
||||
_LOGIN_HINT_TMPL = (
|
||||
'You can log in using your refresh token with --username "{}" --password "REFRESH_TOKEN" '
|
||||
'(replace REFRESH_TOKEN with the actual value of the "{}" cookie found in your web browser). '
|
||||
'You can add an optional username suffix, e.g. --username "{}" , '
|
||||
'if you need to manage multiple accounts. ')
|
||||
_LOGIN_ERRORS_MAP = {
|
||||
'login_required': 'This content is only available for logged-in users. ',
|
||||
'invalid_username': '"{}" is not valid login username for this extractor. ',
|
||||
'invalid_password': (
|
||||
'Your password is not a valid refresh token. Make sure that '
|
||||
'you are passing the refresh token, and NOT the access token. '),
|
||||
'no_refresh_token': (
|
||||
'Your access token has expired and there is no refresh token available. '
|
||||
'Refresh your session/cookies in the web browser and try again. '),
|
||||
'expired_refresh_token': (
|
||||
'Your refresh token has expired. Log in to the site again using '
|
||||
'your web browser to get a new refresh token or export fresh cookies. '),
|
||||
}
|
||||
_OAUTH_PREFIX = 'oauth'
|
||||
_oauth_tokens = {}
|
||||
_device_id = None
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._API_HEADERS.get('Authorization'):
|
||||
return
|
||||
|
||||
headers = {
|
||||
'x-acc-app-secret': '5419526f1c624b38b10787e5c10b2a7a',
|
||||
'x-acc-app-version': '3.3.6',
|
||||
'x-acc-language': 'en',
|
||||
'x-acc-service-id': 'weverse',
|
||||
'x-acc-trace-id': str(uuid.uuid4()),
|
||||
'x-clog-user-device-id': str(uuid.uuid4()),
|
||||
@property
|
||||
def _oauth_headers(self):
|
||||
return {
|
||||
**self._API_HEADERS,
|
||||
'X-ACC-APP-SECRET': '5419526f1c624b38b10787e5c10b2a7a',
|
||||
'X-ACC-SERVICE-ID': 'weverse',
|
||||
'X-ACC-TRACE-ID': str(uuid.uuid4()),
|
||||
}
|
||||
valid_username = traverse_obj(self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/v2/signup/email/status', None, note='Checking username',
|
||||
query={'email': username}, headers=headers, expected_status=(400, 404)), 'hasPassword')
|
||||
if not valid_username:
|
||||
raise ExtractorError('Invalid username provided', expected=True)
|
||||
|
||||
headers['content-type'] = 'application/json'
|
||||
@functools.cached_property
|
||||
def _oauth_cache_key(self):
|
||||
username = self._get_login_info()[0]
|
||||
if not username:
|
||||
return 'cookies'
|
||||
return join_nonempty(self._OAUTH_PREFIX, username.partition('+')[2])
|
||||
|
||||
@property
|
||||
def _is_logged_in(self):
|
||||
return bool(self._oauth_tokens.get(self._ACCESS_TOKEN_KEY))
|
||||
|
||||
def _access_token_is_valid(self):
|
||||
response = self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/api/v1/token/validate', None,
|
||||
'Validating access token', 'Unable to valid access token',
|
||||
expected_status=401, headers={
|
||||
**self._oauth_headers,
|
||||
'Authorization': f'Bearer {self._oauth_tokens[self._ACCESS_TOKEN_KEY]}',
|
||||
})
|
||||
return traverse_obj(response, ('expiresIn', {int}), default=0) > 60
|
||||
|
||||
def _token_is_expired(self, key):
|
||||
is_expired = jwt_decode_hs256(self._oauth_tokens[key])['exp'] - time.time() < 3600
|
||||
if key == self._REFRESH_TOKEN_KEY or not is_expired:
|
||||
return is_expired
|
||||
return not self._access_token_is_valid()
|
||||
|
||||
def _refresh_access_token(self):
|
||||
if not self._oauth_tokens.get(self._REFRESH_TOKEN_KEY):
|
||||
self._report_login_error('no_refresh_token')
|
||||
if self._token_is_expired(self._REFRESH_TOKEN_KEY):
|
||||
self._report_login_error('expired_refresh_token')
|
||||
|
||||
headers = {'Content-Type': 'application/json'}
|
||||
if self._is_logged_in:
|
||||
headers['Authorization'] = f'Bearer {self._oauth_tokens[self._ACCESS_TOKEN_KEY]}'
|
||||
|
||||
try:
|
||||
auth = self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/v3/auth/token/by-credentials', None, data=json.dumps({
|
||||
'email': username,
|
||||
'otpSessionId': 'BY_PASS',
|
||||
'password': password,
|
||||
}, separators=(',', ':')).encode(), headers=headers, note='Logging in')
|
||||
response = self._download_json(
|
||||
f'{self._ACCOUNT_API_BASE}/api/v1/token/refresh', None,
|
||||
'Refreshing access token', 'Unable to refresh access token',
|
||||
headers={**self._oauth_headers, **headers},
|
||||
data=json.dumps({
|
||||
'refreshToken': self._oauth_tokens[self._REFRESH_TOKEN_KEY],
|
||||
}, separators=(',', ':')).encode())
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
raise ExtractorError('Invalid password provided', expected=True)
|
||||
self._oauth_tokens.clear()
|
||||
if self._oauth_cache_key == 'cookies':
|
||||
self.cookiejar.clear(domain='.weverse.io', path='/', name=self._ACCESS_TOKEN_KEY)
|
||||
self.cookiejar.clear(domain='.weverse.io', path='/', name=self._REFRESH_TOKEN_KEY)
|
||||
else:
|
||||
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, self._oauth_tokens)
|
||||
self._report_login_error('expired_refresh_token')
|
||||
raise
|
||||
|
||||
WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {auth["accessToken"]}'
|
||||
self._oauth_tokens.update(traverse_obj(response, {
|
||||
self._ACCESS_TOKEN_KEY: ('accessToken', {str}, {require('access token')}),
|
||||
self._REFRESH_TOKEN_KEY: ('refreshToken', {str}, {require('refresh token')}),
|
||||
}))
|
||||
|
||||
def _real_initialize(self):
|
||||
if self._API_HEADERS.get('Authorization'):
|
||||
if self._oauth_cache_key == 'cookies':
|
||||
self._set_cookie('.weverse.io', self._ACCESS_TOKEN_KEY, self._oauth_tokens[self._ACCESS_TOKEN_KEY])
|
||||
self._set_cookie('.weverse.io', self._REFRESH_TOKEN_KEY, self._oauth_tokens[self._REFRESH_TOKEN_KEY])
|
||||
else:
|
||||
self.cache.store(self._NETRC_MACHINE, self._oauth_cache_key, self._oauth_tokens)
|
||||
|
||||
def _get_authorization_header(self):
|
||||
if not self._is_logged_in:
|
||||
return {}
|
||||
if self._token_is_expired(self._ACCESS_TOKEN_KEY):
|
||||
self._refresh_access_token()
|
||||
return {'Authorization': f'Bearer {self._oauth_tokens[self._ACCESS_TOKEN_KEY]}'}
|
||||
|
||||
def _report_login_error(self, error_id):
|
||||
error_msg = self._LOGIN_ERRORS_MAP[error_id]
|
||||
username = self._get_login_info()[0]
|
||||
|
||||
if error_id == 'invalid_username':
|
||||
error_msg = error_msg.format(username)
|
||||
username = f'{self._OAUTH_PREFIX}+{username}'
|
||||
elif not username:
|
||||
username = f'{self._OAUTH_PREFIX}+USERNAME'
|
||||
|
||||
raise ExtractorError(join_nonempty(
|
||||
error_msg, self._LOGIN_HINT_TMPL.format(self._OAUTH_PREFIX, self._REFRESH_TOKEN_KEY, username),
|
||||
'Or else you can u', self._login_hint(method='session_cookies')[1:], delim=''), expected=True)
|
||||
|
||||
def _perform_login(self, username, password):
|
||||
if self._is_logged_in:
|
||||
return
|
||||
|
||||
token = try_call(lambda: self._get_cookies('https://weverse.io/')['we2_access_token'].value)
|
||||
if token:
|
||||
WeverseBaseIE._API_HEADERS['Authorization'] = f'Bearer {token}'
|
||||
if username.partition('+')[0] != self._OAUTH_PREFIX:
|
||||
self._report_login_error('invalid_username')
|
||||
|
||||
self._oauth_tokens.update(self.cache.load(self._NETRC_MACHINE, self._oauth_cache_key, default={}))
|
||||
if self._is_logged_in and self._access_token_is_valid():
|
||||
return
|
||||
|
||||
rt_key = self._REFRESH_TOKEN_KEY
|
||||
if not self._oauth_tokens.get(rt_key) or self._token_is_expired(rt_key):
|
||||
if try_call(lambda: jwt_decode_hs256(password)['scope']) != 'refresh':
|
||||
self._report_login_error('invalid_password')
|
||||
self._oauth_tokens[rt_key] = password
|
||||
|
||||
self._refresh_access_token()
|
||||
|
||||
def _real_initialize(self):
|
||||
cookies = self._get_cookies('https://weverse.io/')
|
||||
|
||||
if not self._device_id:
|
||||
self._device_id = traverse_obj(cookies, (self._DEVICE_ID_KEY, 'value')) or str(uuid.uuid4())
|
||||
|
||||
if self._is_logged_in:
|
||||
return
|
||||
|
||||
self._oauth_tokens.update(traverse_obj(cookies, {
|
||||
self._ACCESS_TOKEN_KEY: (self._ACCESS_TOKEN_KEY, 'value'),
|
||||
self._REFRESH_TOKEN_KEY: (self._REFRESH_TOKEN_KEY, 'value'),
|
||||
}))
|
||||
if self._is_logged_in and not self._access_token_is_valid():
|
||||
self._refresh_access_token()
|
||||
|
||||
def _call_api(self, ep, video_id, data=None, note='Downloading API JSON'):
|
||||
# Ref: https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/2488.a09b41ff.chunk.js
|
||||
# From https://ssl.pstatic.net/static/wevweb/2_3_2_11101725/public/static/js/main.e206f7c1.js:
|
||||
key = b'1b9cb6378d959b45714bec49971ade22e6e24e42'
|
||||
api_path = update_url_query(ep, {
|
||||
# 'gcc': 'US',
|
||||
'appId': 'be4d79eb8fc7bd008ee82c8ec4ff6fd4',
|
||||
'language': 'en',
|
||||
'os': 'WEB',
|
||||
'platform': 'WEB',
|
||||
'os': self._CLIENT_PLATFORM,
|
||||
'platform': self._CLIENT_PLATFORM,
|
||||
'wpf': 'pc',
|
||||
})
|
||||
wmsgpad = int(time.time() * 1000)
|
||||
wmd = base64.b64encode(hmac.HMAC(
|
||||
key, f'{api_path[:255]}{wmsgpad}'.encode(), digestmod=hashlib.sha1).digest()).decode()
|
||||
headers = {'Content-Type': 'application/json'} if data else {}
|
||||
try:
|
||||
return self._download_json(
|
||||
f'https://global.apis.naver.com/weverse/wevweb{api_path}', video_id, note=note,
|
||||
data=data, headers={**self._API_HEADERS, **headers}, query={
|
||||
'wmsgpad': wmsgpad,
|
||||
'wmd': wmd,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if isinstance(e.cause, HTTPError) and e.cause.status == 401:
|
||||
self.raise_login_required(
|
||||
'Session token has expired. Log in again or refresh cookies in browser')
|
||||
elif isinstance(e.cause, HTTPError) and e.cause.status == 403:
|
||||
if 'Authorization' in self._API_HEADERS:
|
||||
raise ExtractorError('Your account does not have access to this content', expected=True)
|
||||
self.raise_login_required()
|
||||
raise
|
||||
for is_retry in (False, True):
|
||||
wmsgpad = int(time.time() * 1000)
|
||||
wmd = base64.b64encode(hmac.HMAC(
|
||||
self._SIGNING_KEY, f'{api_path[:255]}{wmsgpad}'.encode(),
|
||||
digestmod=hashlib.sha1).digest()).decode()
|
||||
|
||||
try:
|
||||
return self._download_json(
|
||||
f'https://global.apis.naver.com/weverse/wevweb{api_path}', video_id, note=note,
|
||||
data=data, headers={
|
||||
**self._API_HEADERS,
|
||||
**self._get_authorization_header(),
|
||||
**({'Content-Type': 'application/json'} if data else {}),
|
||||
'WEV-device-Id': self._device_id,
|
||||
}, query={
|
||||
'wmsgpad': wmsgpad,
|
||||
'wmd': wmd,
|
||||
})
|
||||
except ExtractorError as e:
|
||||
if is_retry or not isinstance(e.cause, HTTPError):
|
||||
raise
|
||||
elif self._is_logged_in and e.cause.status == 401:
|
||||
self._refresh_access_token()
|
||||
continue
|
||||
elif e.cause.status == 403:
|
||||
if self._is_logged_in:
|
||||
raise ExtractorError(
|
||||
'Your account does not have access to this content', expected=True)
|
||||
self._report_login_error('login_required')
|
||||
raise
|
||||
|
||||
def _call_post_api(self, video_id):
|
||||
path = '' if 'Authorization' in self._API_HEADERS else '/preview'
|
||||
path = '' if self._is_logged_in else '/preview'
|
||||
return self._call_api(f'/post/v1.0/post-{video_id}{path}?fieldSet=postV1', video_id)
|
||||
|
||||
def _get_community_id(self, channel):
|
||||
|
@ -175,6 +175,15 @@ INNERTUBE_CLIENTS = {
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||
'SUPPORTS_COOKIES': True,
|
||||
},
|
||||
'tv_simply': {
|
||||
'INNERTUBE_CONTEXT': {
|
||||
'client': {
|
||||
'clientName': 'TVHTML5_SIMPLY',
|
||||
'clientVersion': '1.0',
|
||||
},
|
||||
},
|
||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 75,
|
||||
},
|
||||
# This client now requires sign-in for every video
|
||||
# It was previously an age-gate workaround for videos that were `playable_in_embed`
|
||||
# It may still be useful if signed into an EU account that is not age-verified
|
||||
|
@ -250,7 +250,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||
}
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
||||
_DEFAULT_CLIENTS = ('tv', 'ios', 'web')
|
||||
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web')
|
||||
|
||||
@ -2228,21 +2228,21 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
def _extract_n_function_name(self, jscode, player_url=None):
|
||||
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('_w8_'), any)):
|
||||
funcname = self._search_regex(
|
||||
r'''(?xs)
|
||||
[;\n](?:
|
||||
(?P<f>function\s+)|
|
||||
(?:var\s+)?
|
||||
)(?P<funcname>[a-zA-Z0-9_$]+)\s*(?(f)|=\s*function\s*)
|
||||
\((?P<argname>[a-zA-Z0-9_$]+)\)\s*\{
|
||||
(?:(?!\}[;\n]).)+
|
||||
\}\s*catch\(\s*[a-zA-Z0-9_$]+\s*\)\s*
|
||||
\{\s*return\s+%s\[%d\]\s*\+\s*(?P=argname)\s*\}\s*return\s+[^}]+\}[;\n]
|
||||
''' % (re.escape(varname), global_list.index(debug_str)),
|
||||
jscode, 'nsig function name', group='funcname', default=None)
|
||||
if funcname:
|
||||
return funcname
|
||||
if debug_str := traverse_obj(global_list, (lambda _, v: v.endswith('-_w8_'), any)):
|
||||
pattern = r'''(?x)
|
||||
\{\s*return\s+%s\[%d\]\s*\+\s*(?P<argname>[a-zA-Z0-9_$]+)\s*\}
|
||||
''' % (re.escape(varname), global_list.index(debug_str))
|
||||
if match := re.search(pattern, jscode):
|
||||
pattern = r'''(?x)
|
||||
\{\s*\)%s\(\s*
|
||||
(?:
|
||||
(?P<funcname_a>[a-zA-Z0-9_$]+)\s*noitcnuf\s*
|
||||
|noitcnuf\s*=\s*(?P<funcname_b>[a-zA-Z0-9_$]+)(?:\s+rav)?
|
||||
)[;\n]
|
||||
''' % re.escape(match.group('argname')[::-1])
|
||||
if match := re.search(pattern, jscode[match.start()::-1]):
|
||||
a, b = match.group('funcname_a', 'funcname_b')
|
||||
return (a or b)[::-1]
|
||||
self.write_debug(join_nonempty(
|
||||
'Initial search was unable to find nsig function name',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
@ -2289,8 +2289,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
|
||||
f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
|
||||
|
||||
def _extract_player_js_global_var(self, jscode, player_url):
|
||||
"""Returns tuple of strings: variable assignment code, variable name, variable value code"""
|
||||
def _interpret_player_js_global_var(self, jscode, player_url):
|
||||
"""Returns tuple of: variable name string, variable value list"""
|
||||
extract_global_var = self._cached(self._search_regex, 'js global array', player_url)
|
||||
varcode, varname, varvalue = extract_global_var(
|
||||
r'''(?x)
|
||||
@ -2308,27 +2308,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self.write_debug(join_nonempty(
|
||||
'No global array variable found in player JS',
|
||||
player_url and f' player = {player_url}', delim='\n'), only_once=True)
|
||||
return varcode, varname, varvalue
|
||||
return None, None
|
||||
|
||||
def _interpret_player_js_global_var(self, jscode, player_url):
|
||||
"""Returns tuple of: variable name string, variable value list"""
|
||||
_, varname, array_code = self._extract_player_js_global_var(jscode, player_url)
|
||||
jsi = JSInterpreter(array_code)
|
||||
jsi = JSInterpreter(varcode)
|
||||
interpret_global_var = self._cached(jsi.interpret_expression, 'js global list', player_url)
|
||||
return varname, interpret_global_var(array_code, {}, allow_recursion=10)
|
||||
return varname, interpret_global_var(varvalue, {}, allow_recursion=10)
|
||||
|
||||
def _fixup_n_function_code(self, argnames, nsig_code, jscode, player_url):
|
||||
varcode, varname, _ = self._extract_player_js_global_var(jscode, player_url)
|
||||
if varcode and varname:
|
||||
nsig_code = varcode + '; ' + nsig_code
|
||||
_, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
varname, global_list = self._interpret_player_js_global_var(jscode, player_url)
|
||||
if varname and global_list:
|
||||
nsig_code = f'var {varname}={json.dumps(global_list)}; {nsig_code}'
|
||||
else:
|
||||
varname = 'dlp_wins'
|
||||
global_list = []
|
||||
|
||||
undefined_idx = global_list.index('undefined') if 'undefined' in global_list else r'\d+'
|
||||
fixed_code = re.sub(
|
||||
rf'''(?x)
|
||||
fr'''(?x)
|
||||
;\s*if\s*\(\s*typeof\s+[a-zA-Z0-9_$]+\s*===?\s*(?:
|
||||
(["\'])undefined\1|
|
||||
{re.escape(varname)}\[{undefined_idx}\]
|
||||
@ -2402,6 +2398,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
return sts
|
||||
|
||||
def _mark_watched(self, video_id, player_responses):
|
||||
# cpn generation algorithm is reverse engineered from base.js.
|
||||
# In fact it works even with dummy cpn.
|
||||
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
||||
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
|
||||
|
||||
for is_full, key in enumerate(('videostatsPlaybackUrl', 'videostatsWatchtimeUrl')):
|
||||
label = 'fully ' if is_full else ''
|
||||
url = get_first(player_responses, ('playbackTracking', key, 'baseUrl'),
|
||||
@ -2412,11 +2413,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
parsed_url = urllib.parse.urlparse(url)
|
||||
qs = urllib.parse.parse_qs(parsed_url.query)
|
||||
|
||||
# cpn generation algorithm is reverse engineered from base.js.
|
||||
# In fact it works even with dummy cpn.
|
||||
CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_'
|
||||
cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16))
|
||||
|
||||
# # more consistent results setting it to right before the end
|
||||
video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)]
|
||||
|
||||
@ -3402,8 +3398,15 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
self._decrypt_signature(encrypted_sig, video_id, player_url),
|
||||
)
|
||||
except ExtractorError as e:
|
||||
self.report_warning('Signature extraction failed: Some formats may be missing',
|
||||
video_id=video_id, only_once=True)
|
||||
self.report_warning(
|
||||
f'Signature extraction failed: Some formats may be missing\n'
|
||||
f' player = {player_url}\n'
|
||||
f' {bug_reports_message(before="")}',
|
||||
video_id=video_id, only_once=True)
|
||||
self.write_debug(
|
||||
f'{video_id}: Signature extraction failure info:\n'
|
||||
f' encrypted sig = {encrypted_sig}\n'
|
||||
f' player = {player_url}')
|
||||
self.write_debug(e, only_once=True)
|
||||
continue
|
||||
|
||||
@ -3950,19 +3953,23 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
||||
|
||||
subtitles = {}
|
||||
skipped_subs_clients = set()
|
||||
prs = traverse_obj(player_responses, (
|
||||
# Filter out initial_pr which does not have streamingData (smuggled client context)
|
||||
lambda _, v: v['streamingData'] and v['captions']['playerCaptionsTracklistRenderer']))
|
||||
|
||||
pctrs = traverse_obj(prs, (..., 'captions', 'playerCaptionsTracklistRenderer', {dict}))
|
||||
# Only web/mweb clients provide translationLanguages, so include initial_pr in the traversal
|
||||
translation_languages = {
|
||||
lang.get('languageCode'): self._get_text(lang.get('languageName'), max_runs=1)
|
||||
for lang in traverse_obj(pctrs, (..., 'translationLanguages', ..., {dict}))}
|
||||
lang['languageCode']: self._get_text(lang['languageName'], max_runs=1)
|
||||
for lang in traverse_obj(player_responses, (
|
||||
..., 'captions', 'playerCaptionsTracklistRenderer', 'translationLanguages',
|
||||
lambda _, v: v['languageCode'] and v['languageName']))
|
||||
}
|
||||
# NB: Constructing the full subtitle dictionary is slow
|
||||
get_translated_subs = 'translated_subs' not in self._configuration_arg('skip') and (
|
||||
self.get_param('writeautomaticsub', False) or self.get_param('listsubtitles'))
|
||||
|
||||
all_captions = traverse_obj(pctrs, (..., 'captionTracks', ..., {dict}))
|
||||
# Filter out initial_pr which does not have streamingData (smuggled client context)
|
||||
prs = traverse_obj(player_responses, (
|
||||
lambda _, v: v['streamingData'] and v['captions']['playerCaptionsTracklistRenderer']))
|
||||
all_captions = traverse_obj(prs, (
|
||||
..., 'captions', 'playerCaptionsTracklistRenderer', 'captionTracks', ..., {dict}))
|
||||
need_subs_langs = {get_lang_code(sub) for sub in all_captions if sub.get('kind') != 'asr'}
|
||||
need_caps_langs = {
|
||||
remove_start(get_lang_code(sub), 'a-')
|
||||
|
@ -20,6 +20,7 @@ WEBPO_CLIENTS = (
|
||||
'WEB_EMBEDDED_PLAYER',
|
||||
'WEB_CREATOR',
|
||||
'WEB_REMIX',
|
||||
'TVHTML5_SIMPLY',
|
||||
'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
||||
)
|
||||
|
||||
|
@ -6,6 +6,7 @@ import time
|
||||
from .common import InfoExtractor
|
||||
from ..utils import (
|
||||
ExtractorError,
|
||||
ISO639Utils,
|
||||
determine_ext,
|
||||
filter_dict,
|
||||
float_or_none,
|
||||
@ -118,10 +119,7 @@ class ZDFBaseIE(InfoExtractor):
|
||||
if ext == 'm3u8':
|
||||
fmts = self._extract_m3u8_formats(
|
||||
format_url, video_id, 'mp4', m3u8_id='hls', fatal=False)
|
||||
elif ext == 'mpd':
|
||||
fmts = self._extract_mpd_formats(
|
||||
format_url, video_id, mpd_id='dash', fatal=False)
|
||||
else:
|
||||
elif ext in ('mp4', 'webm'):
|
||||
height = int_or_none(quality.get('highestVerticalResolution'))
|
||||
width = round(aspect_ratio * height) if aspect_ratio and height else None
|
||||
fmts = [{
|
||||
@ -132,16 +130,31 @@ class ZDFBaseIE(InfoExtractor):
|
||||
'format_id': join_nonempty('http', stream.get('type')),
|
||||
'tbr': int_or_none(self._search_regex(r'_(\d+)k_', format_url, 'tbr', default=None)),
|
||||
}]
|
||||
else:
|
||||
self.report_warning(f'Skipping unsupported extension "{ext}"', video_id=video_id)
|
||||
fmts = []
|
||||
|
||||
f_class = variant.get('class')
|
||||
for f in fmts:
|
||||
f_lang = ISO639Utils.short2long(
|
||||
(f.get('language') or variant.get('language') or '').lower())
|
||||
is_audio_only = f.get('vcodec') == 'none'
|
||||
formats.append({
|
||||
**f,
|
||||
'format_id': join_nonempty(f.get('format_id'), is_dgs and 'dgs'),
|
||||
'format_id': join_nonempty(f['format_id'], is_dgs and 'dgs'),
|
||||
'format_note': join_nonempty(
|
||||
f_class, is_dgs and 'German Sign Language', f.get('format_note'), delim=', '),
|
||||
'language': variant.get('language') or f.get('language'),
|
||||
not is_audio_only and f_class,
|
||||
is_dgs and 'German Sign Language',
|
||||
f.get('format_note'), delim=', '),
|
||||
'preference': -2 if is_dgs else -1,
|
||||
'language_preference': 10 if f_class == 'main' else -10 if f_class == 'ad' else -1,
|
||||
'language': f_lang,
|
||||
'language_preference': (
|
||||
-10 if ((is_audio_only and f.get('format_note') == 'Audiodeskription')
|
||||
or (not is_audio_only and f_class == 'ad'))
|
||||
else 10 if f_lang == 'deu' and f_class == 'main'
|
||||
else 5 if f_lang == 'deu'
|
||||
else 1 if f_class == 'main'
|
||||
else -1),
|
||||
})
|
||||
|
||||
return {
|
||||
@ -333,12 +346,13 @@ class ZDFIE(ZDFBaseIE):
|
||||
'title': 'Dobrindt schließt Steuererhöhungen aus',
|
||||
'description': 'md5:9a117646d7b8df6bc902eb543a9c9023',
|
||||
'duration': 325,
|
||||
'thumbnail': 'https://www.zdf.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080?cb=1743357653736',
|
||||
'thumbnail': 'https://www.zdfheute.de/assets/dobrindt-csu-berlin-direkt-100~1920x1080?cb=1743357653736',
|
||||
'timestamp': 1743374520,
|
||||
'upload_date': '20250330',
|
||||
'_old_archive_ids': ['zdf 250330_clip_2_bdi'],
|
||||
},
|
||||
}, {
|
||||
# FUNK video (hosted on a different CDN, has atypical PTMD and HLS files)
|
||||
'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
|
||||
'md5': '57af4423db0455a3975d2dc4578536bc',
|
||||
'info_dict': {
|
||||
@ -651,6 +665,7 @@ class ZDFChannelIE(ZDFBaseIE):
|
||||
'description': 'md5:6edad39189abf8431795d3d6d7f986b3',
|
||||
},
|
||||
'playlist_count': 242,
|
||||
'skip': 'Video count changes daily, needs support for playlist_maxcount',
|
||||
}]
|
||||
|
||||
_PAGE_SIZE = 24
|
||||
|
@ -1,8 +1,8 @@
|
||||
# Autogenerated by devscripts/update-version.py
|
||||
|
||||
__version__ = '2025.05.22'
|
||||
__version__ = '2025.06.09'
|
||||
|
||||
RELEASE_GIT_HEAD = '7977b329ed97b216e37bd402f4935f28c00eac9e'
|
||||
RELEASE_GIT_HEAD = '339614a173c74b42d63e858c446a9cae262a13af'
|
||||
|
||||
VARIANT = None
|
||||
|
||||
@ -12,4 +12,4 @@ CHANNEL = 'stable'
|
||||
|
||||
ORIGIN = 'yt-dlp/yt-dlp'
|
||||
|
||||
_pkg_version = '2025.05.22'
|
||||
_pkg_version = '2025.06.09'
|
||||
|
Loading…
x
Reference in New Issue
Block a user