[ie] Detect invalid m3u8 playlist data (#13563)

Authored by: Grub4K
This commit is contained in:
Simon Sawicki 2025-06-29 18:49:27 +02:00 committed by GitHub
parent 7e2504f941
commit 7b81634fb1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 75 additions and 10 deletions

View File

@ -36,6 +36,18 @@ class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
self.send_header('Content-Type', 'text/html; charset=utf-8') self.send_header('Content-Type', 'text/html; charset=utf-8')
self.end_headers() self.end_headers()
self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) self.wfile.write(TEAPOT_RESPONSE_BODY.encode())
elif self.path == '/fake.m3u8':
self.send_response(200)
self.send_header('Content-Length', '1024')
self.end_headers()
self.wfile.write(1024 * b'\x00')
elif self.path == '/bipbop.m3u8':
with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f:
data = f.read()
self.send_response(200)
self.send_header('Content-Length', str(len(data)))
self.end_headers()
self.wfile.write(data)
else: else:
assert False assert False
@ -2079,5 +2091,45 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
class TestInfoExtractorNetwork(unittest.TestCase):
def setUp(self, /):
self.httpd = http.server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
self.port = http_server_port(self.httpd)
self.server_thread = threading.Thread(target=self.httpd.serve_forever)
self.server_thread.daemon = True
self.server_thread.start()
self.called = False
def require_warning(*args, **kwargs):
self.called = True
self.ydl = FakeYDL()
self.ydl.report_warning = require_warning
self.ie = DummyIE(self.ydl)
def tearDown(self, /):
self.ydl.close()
self.httpd.shutdown()
self.httpd.server_close()
self.server_thread.join(1)
def test_extract_m3u8_formats(self):
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False)
self.assertFalse(self.called)
self.assertTrue(formats)
self.assertTrue(subtitles)
def test_extract_m3u8_formats_warning(self):
formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles(
f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False)
self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file')
self.assertFalse(formats)
self.assertFalse(subtitles)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View File

@ -1,5 +1,6 @@
import base64 import base64
import collections import collections
import contextlib
import functools import functools
import getpass import getpass
import http.client import http.client
@ -2129,21 +2130,33 @@ class InfoExtractor:
raise ExtractorError(errnote, video_id=video_id) raise ExtractorError(errnote, video_id=video_id)
self.report_warning(f'{errnote}{bug_reports_message()}') self.report_warning(f'{errnote}{bug_reports_message()}')
return [], {} return [], {}
if note is None:
res = self._download_webpage_handle( note = 'Downloading m3u8 information'
m3u8_url, video_id, if errnote is None:
note='Downloading m3u8 information' if note is None else note, errnote = 'Failed to download m3u8 information'
errnote='Failed to download m3u8 information' if errnote is None else errnote, response = self._request_webpage(
m3u8_url, video_id, note=note, errnote=errnote,
fatal=fatal, data=data, headers=headers, query=query) fatal=fatal, data=data, headers=headers, query=query)
if response is False:
if res is False:
return [], {} return [], {}
m3u8_doc, urlh = res with contextlib.closing(response):
m3u8_url = urlh.url prefix = response.read(512)
if not prefix.startswith(b'#EXTM3U'):
msg = 'Response data has no m3u header'
if fatal:
raise ExtractorError(msg, video_id=video_id)
self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id)
return [], {}
content = self._webpage_read_content(
response, m3u8_url, video_id, note=note, errnote=errnote,
fatal=fatal, prefix=prefix, data=data)
if content is False:
return [], {}
return self._parse_m3u8_formats_and_subtitles( return self._parse_m3u8_formats_and_subtitles(
m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, content, response.url, ext=ext, entry_protocol=entry_protocol,
preference=preference, quality=quality, m3u8_id=m3u8_id, preference=preference, quality=quality, m3u8_id=m3u8_id,
note=note, errnote=errnote, fatal=fatal, live=live, data=data, note=note, errnote=errnote, fatal=fatal, live=live, data=data,
headers=headers, query=query, video_id=video_id) headers=headers, query=query, video_id=video_id)