diff --git a/yt_dlp/extractor/_extractors.py b/yt_dlp/extractor/_extractors.py index 61cc05d31..ada12b3a8 100644 --- a/yt_dlp/extractor/_extractors.py +++ b/yt_dlp/extractor/_extractors.py @@ -1824,6 +1824,7 @@ from .safari import ( from .saitosan import SaitosanIE from .samplefocus import SampleFocusIE from .sapo import SapoIE +from .sauceplus import SaucePlusIE from .sbs import SBSIE from .sbscokr import ( SBSCoKrAllvodProgramIE, diff --git a/yt_dlp/extractor/floatplane.py b/yt_dlp/extractor/floatplane.py index b7ee160a4..7dd3b0eb2 100644 --- a/yt_dlp/extractor/floatplane.py +++ b/yt_dlp/extractor/floatplane.py @@ -17,8 +17,140 @@ from ..utils import ( from ..utils.traversal import traverse_obj -class FloatplaneIE(InfoExtractor): +class FloatplaneBaseIE(InfoExtractor): + def _real_extract(self, url): + post_id = self._match_id(url) + + post_data = self._download_json( + f'{self._BASE_URL}/api/v3/content/post', post_id, query={'id': post_id}, + note='Downloading post data', errnote='Unable to download post data', + impersonate=self._IMPERSONATE_TARGET) + + if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): + raise ExtractorError('Post does not contain a video or audio track', expected=True) + + uploader_url = format_field( + post_data, [('creator', 'urlname')], f'{self._BASE_URL}/channel/%s/home') or None + + common_info = { + 'uploader_url': uploader_url, + 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))), + 'availability': self._availability(needs_subscription=True), + **traverse_obj(post_data, { + 'uploader': ('creator', 'title', {str}), + 'uploader_id': ('creator', 'id', {str}), + 'channel': ('channel', 'title', {str}), + 'channel_id': ('channel', 'id', {str}), + 'release_timestamp': ('releaseDate', {parse_iso8601}), + }), + } + + items = [] + for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): + media_id = media['id'] + media_typ = media.get('type') or 'video' + + metadata = self._download_json( + f'{self._BASE_URL}/api/v3/content/{media_typ}', media_id, query={'id': media_id}, + note=f'Downloading {media_typ} metadata', impersonate=self._IMPERSONATE_TARGET) + + stream = self._download_json( + f'{self._BASE_URL}/api/v2/cdn/delivery', media_id, query={ + 'type': 'vod' if media_typ == 'video' else 'aod', + 'guid': metadata['guid'], + }, note=f'Downloading {media_typ} stream data', + impersonate=self._IMPERSONATE_TARGET) + + path_template = traverse_obj(stream, ('resource', 'uri', {str})) + + def format_path(params): + path = path_template + for i, val in (params or {}).items(): + path = path.replace(f'{{qualityLevelParams.{i}}}', val) + return path + + formats = [] + for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): + url = urljoin(stream['cdn'], format_path(traverse_obj( + stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict})))) + format_id = traverse_obj(quality, ('name', {str})) + hls_aes = {} + m3u8_data = None + + # If we need impersonation for the API, then we need it for HLS keys too: extract in advance + if self._IMPERSONATE_TARGET is not None: + m3u8_data = self._download_webpage( + url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS, + note=join_nonempty('Downloading', format_id, 'm3u8 information', delim=' '), + errnote=join_nonempty('Failed to download', format_id, 'm3u8 information', delim=' ')) + if not m3u8_data: + continue + + key_url = self._search_regex( + r'#EXT-X-KEY:METHOD=AES-128,URI="(https?://[^"]+)"', + m3u8_data, 'HLS AES key URI', default=None) + if key_url: + urlh = self._request_webpage( + key_url, media_id, fatal=False, impersonate=self._IMPERSONATE_TARGET, headers=self._HEADERS, + note=join_nonempty('Downloading', format_id, 'HLS AES key', delim=' '), + errnote=join_nonempty('Failed to download', format_id, 'HLS AES key', delim=' ')) + if urlh: + hls_aes['key'] = urlh.read().hex() + + formats.append({ + **traverse_obj(quality, { + 'format_note': ('label', {str}), + 'width': ('width', {int}), + 'height': ('height', {int}), + }), + **parse_codecs(quality.get('codecs')), + 'url': url, + 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), + 'format_id': format_id, + 'hls_media_playlist_data': m3u8_data, + 'hls_aes': hls_aes or None, + }) + items.append({ + **common_info, + 'id': media_id, + **traverse_obj(metadata, { + 'title': ('title', {str}), + 'duration': ('duration', {int_or_none}), + 'thumbnail': ('thumbnail', 'path', {url_or_none}), + }), + 'formats': formats, + }) + + post_info = { + **common_info, + 'id': post_id, + 'display_id': post_id, + **traverse_obj(post_data, { + 'title': ('title', {str}), + 'description': ('text', {clean_html}), + 'like_count': ('likes', {int_or_none}), + 'dislike_count': ('dislikes', {int_or_none}), + 'comment_count': ('comments', {int_or_none}), + 'thumbnail': ('thumbnail', 'path', {url_or_none}), + }), + 'http_headers': self._HEADERS, + } + + if len(items) > 1: + return self.playlist_result(items, **post_info) + + post_info.update(items[0]) + return post_info + + +class FloatplaneIE(FloatplaneBaseIE): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/post/(?P\w+)' + _BASE_URL = 'https://www.floatplane.com' + _IMPERSONATE_TARGET = None + _HEADERS = { + 'Origin': _BASE_URL, + 'Referer': f'{_BASE_URL}/', + } _TESTS = [{ 'url': 'https://www.floatplane.com/post/2Yf3UedF7C', 'info_dict': { @@ -170,105 +302,9 @@ class FloatplaneIE(InfoExtractor): }] def _real_initialize(self): - if not self._get_cookies('https://www.floatplane.com').get('sails.sid'): + if not self._get_cookies(self._BASE_URL).get('sails.sid'): self.raise_login_required() - def _real_extract(self, url): - post_id = self._match_id(url) - - post_data = self._download_json( - 'https://www.floatplane.com/api/v3/content/post', post_id, query={'id': post_id}, - note='Downloading post data', errnote='Unable to download post data') - - if not any(traverse_obj(post_data, ('metadata', ('hasVideo', 'hasAudio')))): - raise ExtractorError('Post does not contain a video or audio track', expected=True) - - uploader_url = format_field( - post_data, [('creator', 'urlname')], 'https://www.floatplane.com/channel/%s/home') or None - - common_info = { - 'uploader_url': uploader_url, - 'channel_url': urljoin(f'{uploader_url}/', traverse_obj(post_data, ('channel', 'urlname'))), - 'availability': self._availability(needs_subscription=True), - **traverse_obj(post_data, { - 'uploader': ('creator', 'title', {str}), - 'uploader_id': ('creator', 'id', {str}), - 'channel': ('channel', 'title', {str}), - 'channel_id': ('channel', 'id', {str}), - 'release_timestamp': ('releaseDate', {parse_iso8601}), - }), - } - - items = [] - for media in traverse_obj(post_data, (('videoAttachments', 'audioAttachments'), ...)): - media_id = media['id'] - media_typ = media.get('type') or 'video' - - metadata = self._download_json( - f'https://www.floatplane.com/api/v3/content/{media_typ}', media_id, query={'id': media_id}, - note=f'Downloading {media_typ} metadata') - - stream = self._download_json( - 'https://www.floatplane.com/api/v2/cdn/delivery', media_id, query={ - 'type': 'vod' if media_typ == 'video' else 'aod', - 'guid': metadata['guid'], - }, note=f'Downloading {media_typ} stream data') - - path_template = traverse_obj(stream, ('resource', 'uri', {str})) - - def format_path(params): - path = path_template - for i, val in (params or {}).items(): - path = path.replace(f'{{qualityLevelParams.{i}}}', val) - return path - - formats = [] - for quality in traverse_obj(stream, ('resource', 'data', 'qualityLevels', ...)): - url = urljoin(stream['cdn'], format_path(traverse_obj( - stream, ('resource', 'data', 'qualityLevelParams', quality['name'], {dict})))) - formats.append({ - **traverse_obj(quality, { - 'format_id': ('name', {str}), - 'format_note': ('label', {str}), - 'width': ('width', {int}), - 'height': ('height', {int}), - }), - **parse_codecs(quality.get('codecs')), - 'url': url, - 'ext': determine_ext(url.partition('/chunk.m3u8')[0], 'mp4'), - }) - - items.append({ - **common_info, - 'id': media_id, - **traverse_obj(metadata, { - 'title': ('title', {str}), - 'duration': ('duration', {int_or_none}), - 'thumbnail': ('thumbnail', 'path', {url_or_none}), - }), - 'formats': formats, - }) - - post_info = { - **common_info, - 'id': post_id, - 'display_id': post_id, - **traverse_obj(post_data, { - 'title': ('title', {str}), - 'description': ('text', {clean_html}), - 'like_count': ('likes', {int_or_none}), - 'dislike_count': ('dislikes', {int_or_none}), - 'comment_count': ('comments', {int_or_none}), - 'thumbnail': ('thumbnail', 'path', {url_or_none}), - }), - } - - if len(items) > 1: - return self.playlist_result(items, **post_info) - - post_info.update(items[0]) - return post_info - class FloatplaneChannelIE(InfoExtractor): _VALID_URL = r'https?://(?:(?:www|beta)\.)?floatplane\.com/channel/(?P[\w-]+)/home(?:/(?P[\w-]+))?' diff --git a/yt_dlp/extractor/sauceplus.py b/yt_dlp/extractor/sauceplus.py new file mode 100644 index 000000000..75d7022d3 --- /dev/null +++ b/yt_dlp/extractor/sauceplus.py @@ -0,0 +1,41 @@ +from .floatplane import FloatplaneBaseIE + + +class SaucePlusIE(FloatplaneBaseIE): + IE_DESC = 'Sauce+' + _VALID_URL = r'https?://(?:(?:www|beta)\.)?sauceplus\.com/post/(?P\w+)' + _BASE_URL = 'https://www.sauceplus.com' + _HEADERS = { + 'Origin': _BASE_URL, + 'Referer': f'{_BASE_URL}/', + } + _IMPERSONATE_TARGET = True + _TESTS = [{ + 'url': 'https://www.sauceplus.com/post/YbBwIa2A5g', + 'info_dict': { + 'id': 'eit4Ugu5TL', + 'ext': 'mp4', + 'display_id': 'YbBwIa2A5g', + 'title': 'Scare the Coyote - Episode 3', + 'description': '', + 'thumbnail': r're:^https?://.*\.jpe?g$', + 'duration': 2975, + 'comment_count': int, + 'like_count': int, + 'dislike_count': int, + 'release_date': '20250627', + 'release_timestamp': 1750993500, + 'uploader': 'Scare The Coyote', + 'uploader_id': '683e0a3269688656a5a49a44', + 'uploader_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home', + 'channel': 'Scare The Coyote', + 'channel_id': '683e0a326968866ceba49a45', + 'channel_url': 'https://www.sauceplus.com/channel/ScareTheCoyote/home/main', + 'availability': 'subscriber_only', + }, + 'params': {'skip_download': 'm3u8'}, + }] + + def _real_initialize(self): + if not self._get_cookies(self._BASE_URL).get('__Host-sp-sess'): + self.raise_login_required()