mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-19 00:05:29 +02:00
Compare commits
4 Commits
f37d599a69
...
03dba2012d
Author | SHA1 | Date | |
---|---|---|---|
![]() |
03dba2012d | ||
![]() |
5d96527be8 | ||
![]() |
1fd0e88b67 | ||
![]() |
231349786e |
@ -1797,7 +1797,7 @@ The following extractors use this feature:
|
|||||||
#### youtube
|
#### youtube
|
||||||
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
* `lang`: Prefer translated metadata (`title`, `description` etc) of this language code (case-sensitive). By default, the video primary language metadata is preferred, with a fallback to `en` translated. See [youtube.py](https://github.com/yt-dlp/yt-dlp/blob/c26f9b991a0681fd3ea548d535919cec1fbbd430/yt_dlp/extractor/youtube.py#L381-L390) for list of supported content language codes
|
||||||
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
* `skip`: One or more of `hls`, `dash` or `translated_subs` to skip extraction of the m3u8 manifests, dash manifests and [auto-translated subtitles](https://github.com/yt-dlp/yt-dlp/issues/4090#issuecomment-1158102032) respectively
|
||||||
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
* `player_client`: Clients to extract video data from. The currently available clients are `web`, `web_safari`, `web_embedded`, `web_music`, `web_creator`, `mweb`, `ios`, `android`, `android_vr`, `tv`, `tv_simply` and `tv_embedded`. By default, `tv,ios,web` is used, or `tv,web` is used when authenticating with cookies. The `web_music` client is added for `music.youtube.com` URLs when logged-in cookies are used. The `web_embedded` client is added for age-restricted videos but only works if the video is embeddable. The `tv_embedded` and `web_creator` clients are added for age-restricted videos if account age-verification is required. Some clients, such as `web` and `web_music`, require a `po_token` for their formats to be downloadable. Some clients, such as `web_creator`, will only work with authentication. Not all clients support authentication via cookies. You can use `default` for the default clients, or you can use `all` for all clients (not recommended). You can prefix a client with `-` to exclude it, e.g. `youtube:player_client=default,-ios`
|
||||||
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
* `player_skip`: Skip some network requests that are generally needed for robust extraction. One or more of `configs` (skip client configs), `webpage` (skip initial webpage), `js` (skip js player), `initial_data` (skip initial data/next ep request). While these options can help reduce the number of requests needed or avoid some rate-limiting, they could cause issues such as missing formats or metadata. See [#860](https://github.com/yt-dlp/yt-dlp/pull/860) and [#12826](https://github.com/yt-dlp/yt-dlp/issues/12826) for more details
|
||||||
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
* `player_params`: YouTube player parameters to use for player requests. Will overwrite any default ones set by yt-dlp.
|
||||||
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
* `player_js_variant`: The player javascript variant to use for signature and nsig deciphering. The known variants are: `main`, `tce`, `tv`, `tv_es6`, `phone`, `tablet`. Only `main` is recommended as a possible workaround; the others are for debugging purposes. The default is to use what is prescribed by the site, and can be selected with `actual`
|
||||||
|
@ -11,7 +11,7 @@ class TestGetWebPoContentBinding:
|
|||||||
|
|
||||||
@pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
|
@pytest.mark.parametrize('client_name, context, is_authenticated, expected', [
|
||||||
*[(client, context, is_authenticated, expected) for client in [
|
*[(client, context, is_authenticated, expected) for client in [
|
||||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
|
||||||
for context, is_authenticated, expected in [
|
for context, is_authenticated, expected in [
|
||||||
(PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
(PoTokenContext.GVS, False, ('example-visitor-data', ContentBindingType.VISITOR_DATA)),
|
||||||
(PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
(PoTokenContext.PLAYER, False, ('example-video-id', ContentBindingType.VIDEO_ID)),
|
||||||
|
@ -49,7 +49,7 @@ class TestWebPoPCSP:
|
|||||||
|
|
||||||
@pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
|
@pytest.mark.parametrize('client_name, context, is_authenticated, remote_host, source_address, request_proxy, expected', [
|
||||||
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
*[(client, context, is_authenticated, remote_host, source_address, request_proxy, expected) for client in [
|
||||||
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER']
|
'WEB', 'MWEB', 'TVHTML5', 'WEB_EMBEDDED_PLAYER', 'WEB_CREATOR', 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', 'TVHTML5_SIMPLY']
|
||||||
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
for context, is_authenticated, remote_host, source_address, request_proxy, expected in [
|
||||||
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
(PoTokenContext.GVS, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'visitor_id'}),
|
||||||
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
(PoTokenContext.PLAYER, False, 'example-remote-host', 'example-source-address', 'example-request-proxy', {'t': 'webpo', 'ip': 'example-remote-host', 'sa': 'example-source-address', 'px': 'example-request-proxy', 'cb': '123abcXYZ_-', 'cbt': 'video_id'}),
|
||||||
|
@ -1,7 +1,5 @@
|
|||||||
from .telecinco import TelecincoBaseIE
|
from .telecinco import TelecincoBaseIE
|
||||||
from ..networking.exceptions import HTTPError
|
|
||||||
from ..utils import (
|
from ..utils import (
|
||||||
ExtractorError,
|
|
||||||
int_or_none,
|
int_or_none,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
)
|
)
|
||||||
@ -81,17 +79,7 @@ class MiTeleIE(TelecincoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
|
webpage = self._download_akamai_webpage(url, display_id)
|
||||||
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
|
||||||
webpage = self._download_webpage(url, display_id, headers={
|
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
|
||||||
})
|
|
||||||
except ExtractorError as e:
|
|
||||||
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
|
||||||
raise
|
|
||||||
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
|
||||||
webpage = self._download_webpage(url, display_id, impersonate=True)
|
|
||||||
|
|
||||||
pre_player = self._search_json(
|
pre_player = self._search_json(
|
||||||
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
r'window\.\$REACTBASE_STATE\.prePlayer_mtweb\s*=',
|
||||||
webpage, 'Pre Player', display_id)['prePlayer']
|
webpage, 'Pre Player', display_id)['prePlayer']
|
||||||
|
@ -4,6 +4,7 @@ from .wrestleuniverse import WrestleUniverseBaseIE
|
|||||||
from ..utils import (
|
from ..utils import (
|
||||||
int_or_none,
|
int_or_none,
|
||||||
traverse_obj,
|
traverse_obj,
|
||||||
|
url_basename,
|
||||||
url_or_none,
|
url_or_none,
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,9 +66,19 @@ class StacommuBaseIE(WrestleUniverseBaseIE):
|
|||||||
hls_info, decrypt = self._call_encrypted_api(
|
hls_info, decrypt = self._call_encrypted_api(
|
||||||
video_id, ':watchArchive', 'stream information', data={'method': 1})
|
video_id, ':watchArchive', 'stream information', data={'method': 1})
|
||||||
|
|
||||||
|
formats = self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id)
|
||||||
|
for f in formats:
|
||||||
|
# bitrates are exaggerated in PPV playlists, so avoid wrong/huge filesize_approx values
|
||||||
|
if f.get('tbr'):
|
||||||
|
f['tbr'] = int(f['tbr'] / 2.5)
|
||||||
|
# prefer variants with the same basename as the master playlist to avoid partial streams
|
||||||
|
f['format_id'] = url_basename(f['url']).partition('.')[0]
|
||||||
|
if not f['format_id'].startswith(url_basename(f['manifest_url']).partition('.')[0]):
|
||||||
|
f['preference'] = -10
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'id': video_id,
|
'id': video_id,
|
||||||
'formats': self._get_formats(hls_info, ('hls', 'urls', ..., {url_or_none}), video_id),
|
'formats': formats,
|
||||||
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
|
'hls_aes': self._extract_hls_key(hls_info, 'hls', decrypt),
|
||||||
**traverse_obj(video_info, {
|
**traverse_obj(video_info, {
|
||||||
'title': ('displayName', {str}),
|
'title': ('displayName', {str}),
|
||||||
|
@ -63,6 +63,17 @@ class TelecincoBaseIE(InfoExtractor):
|
|||||||
'http_headers': headers,
|
'http_headers': headers,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def _download_akamai_webpage(self, url, display_id):
|
||||||
|
try: # yt-dlp's default user-agents are too old and blocked by akamai
|
||||||
|
return self._download_webpage(url, display_id, headers={
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; rv:136.0) Gecko/20100101 Firefox/136.0',
|
||||||
|
})
|
||||||
|
except ExtractorError as e:
|
||||||
|
if not isinstance(e.cause, HTTPError) or e.cause.status != 403:
|
||||||
|
raise
|
||||||
|
# Retry with impersonation if hardcoded UA is insufficient to bypass akamai
|
||||||
|
return self._download_webpage(url, display_id, impersonate=True)
|
||||||
|
|
||||||
|
|
||||||
class TelecincoIE(TelecincoBaseIE):
|
class TelecincoIE(TelecincoBaseIE):
|
||||||
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
IE_DESC = 'telecinco.es, cuatro.com and mediaset.es'
|
||||||
@ -140,7 +151,7 @@ class TelecincoIE(TelecincoBaseIE):
|
|||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
display_id = self._match_id(url)
|
display_id = self._match_id(url)
|
||||||
webpage = self._download_webpage(url, display_id)
|
webpage = self._download_akamai_webpage(url, display_id)
|
||||||
article = self._search_json(
|
article = self._search_json(
|
||||||
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
r'window\.\$REACTBASE_STATE\.article(?:_multisite)?\s*=',
|
||||||
webpage, 'article', display_id)['article']
|
webpage, 'article', display_id)['article']
|
||||||
|
@ -175,6 +175,15 @@ INNERTUBE_CLIENTS = {
|
|||||||
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
|
||||||
'SUPPORTS_COOKIES': True,
|
'SUPPORTS_COOKIES': True,
|
||||||
},
|
},
|
||||||
|
'tv_simply': {
|
||||||
|
'INNERTUBE_CONTEXT': {
|
||||||
|
'client': {
|
||||||
|
'clientName': 'TVHTML5_SIMPLY',
|
||||||
|
'clientVersion': '1.0',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
'INNERTUBE_CONTEXT_CLIENT_NAME': 75,
|
||||||
|
},
|
||||||
# This client now requires sign-in for every video
|
# This client now requires sign-in for every video
|
||||||
# It was previously an age-gate workaround for videos that were `playable_in_embed`
|
# It was previously an age-gate workaround for videos that were `playable_in_embed`
|
||||||
# It may still be useful if signed into an EU account that is not age-verified
|
# It may still be useful if signed into an EU account that is not age-verified
|
||||||
|
@ -250,7 +250,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|||||||
'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
'400': {'ext': 'mp4', 'height': 1440, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||||
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
'401': {'ext': 'mp4', 'height': 2160, 'format_note': 'DASH video', 'vcodec': 'av01.0.12M.08'},
|
||||||
}
|
}
|
||||||
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
|
_SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'srt', 'vtt')
|
||||||
_DEFAULT_CLIENTS = ('tv', 'ios', 'web')
|
_DEFAULT_CLIENTS = ('tv', 'ios', 'web')
|
||||||
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web')
|
_DEFAULT_AUTHED_CLIENTS = ('tv', 'web')
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ WEBPO_CLIENTS = (
|
|||||||
'WEB_EMBEDDED_PLAYER',
|
'WEB_EMBEDDED_PLAYER',
|
||||||
'WEB_CREATOR',
|
'WEB_CREATOR',
|
||||||
'WEB_REMIX',
|
'WEB_REMIX',
|
||||||
|
'TVHTML5_SIMPLY',
|
||||||
'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user