mirror of
https://github.com/yt-dlp/yt-dlp.git
synced 2025-06-18 15:55:30 +02:00
[ie] Improve JSON LD thumbnails extraction (#13368)
Authored by: bashonly, doe1080 Co-authored-by: doe1080 <98906116+doe1080@users.noreply.github.com>
This commit is contained in:
parent
943083edcd
commit
85c8a405e3
@ -314,6 +314,20 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
},
|
},
|
||||||
{},
|
{},
|
||||||
),
|
),
|
||||||
|
(
|
||||||
|
# test thumbnail_url key without URL scheme
|
||||||
|
r'''
|
||||||
|
<script type="application/ld+json">
|
||||||
|
{
|
||||||
|
"@context": "https://schema.org",
|
||||||
|
"@type": "VideoObject",
|
||||||
|
"thumbnail_url": "//www.nobelprize.org/images/12693-landscape-medium-gallery.jpg"
|
||||||
|
}</script>''',
|
||||||
|
{
|
||||||
|
'thumbnails': [{'url': 'https://www.nobelprize.org/images/12693-landscape-medium-gallery.jpg'}],
|
||||||
|
},
|
||||||
|
{},
|
||||||
|
),
|
||||||
]
|
]
|
||||||
for html, expected_dict, search_json_ld_kwargs in _TESTS:
|
for html, expected_dict, search_json_ld_kwargs in _TESTS:
|
||||||
expect_dict(
|
expect_dict(
|
||||||
|
@ -1675,9 +1675,9 @@ class InfoExtractor:
|
|||||||
'ext': mimetype2ext(e.get('encodingFormat')),
|
'ext': mimetype2ext(e.get('encodingFormat')),
|
||||||
'title': unescapeHTML(e.get('name')),
|
'title': unescapeHTML(e.get('name')),
|
||||||
'description': unescapeHTML(e.get('description')),
|
'description': unescapeHTML(e.get('description')),
|
||||||
'thumbnails': [{'url': unescapeHTML(url)}
|
'thumbnails': traverse_obj(e, (('thumbnailUrl', 'thumbnailURL', 'thumbnail_url'), (None, ...), {
|
||||||
for url in variadic(traverse_obj(e, 'thumbnailUrl', 'thumbnailURL'))
|
'url': ({str}, {unescapeHTML}, {self._proto_relative_url}, {url_or_none}),
|
||||||
if url_or_none(url)],
|
})),
|
||||||
'duration': parse_duration(e.get('duration')),
|
'duration': parse_duration(e.get('duration')),
|
||||||
'timestamp': unified_timestamp(e.get('uploadDate')),
|
'timestamp': unified_timestamp(e.get('uploadDate')),
|
||||||
# author can be an instance of 'Organization' or 'Person' types.
|
# author can be an instance of 'Organization' or 'Person' types.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user