diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index e6c8d574e0..c9f70431f7 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -36,6 +36,18 @@ class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): self.send_header('Content-Type', 'text/html; charset=utf-8') self.end_headers() self.wfile.write(TEAPOT_RESPONSE_BODY.encode()) + elif self.path == '/fake.m3u8': + self.send_response(200) + self.send_header('Content-Length', '1024') + self.end_headers() + self.wfile.write(1024 * b'\x00') + elif self.path == '/bipbop.m3u8': + with open('test/testdata/m3u8/bipbop_16x9.m3u8', 'rb') as f: + data = f.read() + self.send_response(200) + self.send_header('Content-Length', str(len(data))) + self.end_headers() + self.wfile.write(data) else: assert False @@ -2079,5 +2091,45 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/ self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT) +class TestInfoExtractorNetwork(unittest.TestCase): + def setUp(self, /): + self.httpd = http.server.HTTPServer( + ('127.0.0.1', 0), InfoExtractorTestRequestHandler) + self.port = http_server_port(self.httpd) + + self.server_thread = threading.Thread(target=self.httpd.serve_forever) + self.server_thread.daemon = True + self.server_thread.start() + + self.called = False + + def require_warning(*args, **kwargs): + self.called = True + + self.ydl = FakeYDL() + self.ydl.report_warning = require_warning + self.ie = DummyIE(self.ydl) + + def tearDown(self, /): + self.ydl.close() + self.httpd.shutdown() + self.httpd.server_close() + self.server_thread.join(1) + + def test_extract_m3u8_formats(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/bipbop.m3u8', None, fatal=False) + self.assertFalse(self.called) + self.assertTrue(formats) + self.assertTrue(subtitles) + + def test_extract_m3u8_formats_warning(self): + formats, subtitles = self.ie._extract_m3u8_formats_and_subtitles( + f'http://127.0.0.1:{self.port}/fake.m3u8', None, fatal=False) + self.assertTrue(self.called, 'Warning was not issued for binary m3u8 file') + self.assertFalse(formats) + self.assertFalse(subtitles) + + if __name__ == '__main__': unittest.main() diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py index 32b4680b73..b75e806233 100644 --- a/yt_dlp/extractor/common.py +++ b/yt_dlp/extractor/common.py @@ -1,5 +1,6 @@ import base64 import collections +import contextlib import functools import getpass import http.client @@ -2129,21 +2130,33 @@ class InfoExtractor: raise ExtractorError(errnote, video_id=video_id) self.report_warning(f'{errnote}{bug_reports_message()}') return [], {} - - res = self._download_webpage_handle( - m3u8_url, video_id, - note='Downloading m3u8 information' if note is None else note, - errnote='Failed to download m3u8 information' if errnote is None else errnote, + if note is None: + note = 'Downloading m3u8 information' + if errnote is None: + errnote = 'Failed to download m3u8 information' + response = self._request_webpage( + m3u8_url, video_id, note=note, errnote=errnote, fatal=fatal, data=data, headers=headers, query=query) - - if res is False: + if response is False: return [], {} - m3u8_doc, urlh = res - m3u8_url = urlh.url + with contextlib.closing(response): + prefix = response.read(512) + if not prefix.startswith(b'#EXTM3U'): + msg = 'Response data has no m3u header' + if fatal: + raise ExtractorError(msg, video_id=video_id) + self.report_warning(f'{msg}{bug_reports_message()}', video_id=video_id) + return [], {} + + content = self._webpage_read_content( + response, m3u8_url, video_id, note=note, errnote=errnote, + fatal=fatal, prefix=prefix, data=data) + if content is False: + return [], {} return self._parse_m3u8_formats_and_subtitles( - m3u8_doc, m3u8_url, ext=ext, entry_protocol=entry_protocol, + content, response.url, ext=ext, entry_protocol=entry_protocol, preference=preference, quality=quality, m3u8_id=m3u8_id, note=note, errnote=errnote, fatal=fatal, live=live, data=data, headers=headers, query=query, video_id=video_id)