From 97ddfefeb4faba6e61cd80996c16952b8eab16f3 Mon Sep 17 00:00:00 2001 From: doe1080 <98906116+doe1080@users.noreply.github.com> Date: Sun, 8 Jun 2025 09:04:32 +0900 Subject: [PATCH] [ie/nobelprize] Fix extractor (#13205) Authored by: doe1080 --- yt_dlp/extractor/nobelprize.py | 80 +++++++++++++++++----------------- 1 file changed, 39 insertions(+), 41 deletions(-) diff --git a/yt_dlp/extractor/nobelprize.py b/yt_dlp/extractor/nobelprize.py index 536ca27f75..833bab0944 100644 --- a/yt_dlp/extractor/nobelprize.py +++ b/yt_dlp/extractor/nobelprize.py @@ -1,59 +1,57 @@ from .common import InfoExtractor from ..utils import ( - determine_ext, - get_element_by_attribute, + UnsupportedError, + clean_html, int_or_none, - js_to_json, - mimetype2ext, - update_url_query, + parse_duration, + parse_qs, + str_or_none, + update_url, ) +from ..utils.traversal import find_element, traverse_obj class NobelPrizeIE(InfoExtractor): - _WORKING = False - _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P\d+)' - _TEST = { - 'url': 'http://www.nobelprize.org/mediaplayer/?id=2636', - 'md5': '04c81e5714bb36cc4e2232fee1d8157f', + _VALID_URL = r'https?://(?:(?:mediaplayer|www)\.)?nobelprize\.org/mediaplayer/' + _TESTS = [{ + 'url': 'https://www.nobelprize.org/mediaplayer/?id=2636', 'info_dict': { 'id': '2636', 'ext': 'mp4', 'title': 'Announcement of the 2016 Nobel Prize in Physics', - 'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739', + 'description': 'md5:1a2d8a6ca80c88fb3b9a326e0b0e8e43', + 'duration': 1560.0, + 'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg', + 'timestamp': 1504883793, + 'upload_date': '20170908', }, - } + }, { + 'url': 'https://mediaplayer.nobelprize.org/mediaplayer/?qid=12693', + 'info_dict': { + 'id': '12693', + 'ext': 'mp4', + 'title': 'Nobel Lecture by Peter Higgs', + 'description': 'md5:9b12e275dbe3a8138484e70e00673a05', + 'duration': 1800.0, + 'thumbnail': r're:https?://www\.nobelprize\.org/images/.+\.jpg', + 'timestamp': 1504883793, + 'upload_date': '20170908', + }, + }] def _real_extract(self, url): - video_id = self._match_id(url) - webpage = self._download_webpage(url, video_id) - media = self._parse_json(self._search_regex( - r'(?s)var\s*config\s*=\s*({.+?});', webpage, - 'config'), video_id, js_to_json)['media'] - title = media['title'] - - formats = [] - for source in media.get('source', []): - source_src = source.get('src') - if not source_src: - continue - ext = mimetype2ext(source.get('type')) or determine_ext(source_src) - if ext == 'm3u8': - formats.extend(self._extract_m3u8_formats( - source_src, video_id, 'mp4', 'm3u8_native', - m3u8_id='hls', fatal=False)) - elif ext == 'f4m': - formats.extend(self._extract_f4m_formats( - update_url_query(source_src, {'hdcore': '3.7.0'}), - video_id, f4m_id='hds', fatal=False)) - else: - formats.append({ - 'url': source_src, - }) + video_id = traverse_obj(parse_qs(url), ( + ('id', 'qid'), -1, {int_or_none}, {str_or_none}, any)) + if not video_id: + raise UnsupportedError(url) + webpage = self._download_webpage( + update_url(url, netloc='mediaplayer.nobelprize.org'), video_id) return { + **self._search_json_ld(webpage, video_id), 'id': video_id, - 'title': title, - 'description': get_element_by_attribute('itemprop', 'description', webpage), - 'duration': int_or_none(media.get('duration')), - 'formats': formats, + 'title': self._html_search_meta('caption', webpage), + 'description': traverse_obj(webpage, ( + {find_element(tag='span', attr='itemprop', value='description')}, {clean_html})), + 'duration': parse_duration(self._html_search_meta('duration', webpage)), }