diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index bc89b2955e..e6c8d574e0 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1947,6 +1947,137 @@ jwplayer("mediaplayer").setup({"abouttext":"Visit Indie DB","aboutlink":"http:\/
with self.assertWarns(DeprecationWarning):
self.assertEqual(self.ie._search_nextjs_data('', None, default='{}'), {})
+ def test_search_nuxt_json(self):
+ HTML_TMPL = ''
+ VALID_DATA = '''
+ ["ShallowReactive",1],
+ {"data":2,"state":21,"once":25,"_errors":28,"_server_errors":30},
+ ["ShallowReactive",3],
+ {"$abcdef123456":4},
+ {"podcast":5,"activeEpisodeData":7},
+ {"podcast":6,"seasons":14},
+ {"title":10,"id":11},
+ ["Reactive",8],
+ {"episode":9,"creators":18,"empty_list":20},
+ {"title":12,"id":13,"refs":34,"empty_refs":35},
+ "Series Title",
+ "podcast-id-01",
+ "Episode Title",
+ "episode-id-99",
+ [15,16,17],
+ 1,
+ 2,
+ 3,
+ [19],
+ "Podcast Creator",
+ [],
+ {"$ssite-config":22},
+ {"env":23,"name":24,"map":26,"numbers":14},
+ "production",
+ "podcast-website",
+ ["Set"],
+ ["Reactive",27],
+ ["Map"],
+ ["ShallowReactive",29],
+ {},
+ ["NuxtError",31],
+ {"status":32,"message":33},
+ 503,
+ "Service Unavailable",
+ [36,37],
+ [38,39],
+ ["Ref",40],
+ ["ShallowRef",41],
+ ["EmptyRef",42],
+ ["EmptyShallowRef",43],
+ "ref",
+ "shallow_ref",
+ "{\\"ref\\":1}",
+ "{\\"shallow_ref\\":2}"
+ '''
+ PAYLOAD = {
+ 'data': {
+ '$abcdef123456': {
+ 'podcast': {
+ 'podcast': {
+ 'title': 'Series Title',
+ 'id': 'podcast-id-01',
+ },
+ 'seasons': [1, 2, 3],
+ },
+ 'activeEpisodeData': {
+ 'episode': {
+ 'title': 'Episode Title',
+ 'id': 'episode-id-99',
+ 'refs': ['ref', 'shallow_ref'],
+ 'empty_refs': [{'ref': 1}, {'shallow_ref': 2}],
+ },
+ 'creators': ['Podcast Creator'],
+ 'empty_list': [],
+ },
+ },
+ },
+ 'state': {
+ '$ssite-config': {
+ 'env': 'production',
+ 'name': 'podcast-website',
+ 'map': [],
+ 'numbers': [1, 2, 3],
+ },
+ },
+ 'once': [],
+ '_errors': {},
+ '_server_errors': {
+ 'status': 503,
+ 'message': 'Service Unavailable',
+ },
+ }
+ PARTIALLY_INVALID = [(
+ '''
+ {"data":1},
+ {"invalid_raw_list":2},
+ [15,16,17]
+ ''',
+ {'data': {'invalid_raw_list': [None, None, None]}},
+ ), (
+ '''
+ {"data":1},
+ ["EmptyRef",2],
+ "not valid JSON"
+ ''',
+ {'data': None},
+ ), (
+ '''
+ {"data":1},
+ ["EmptyShallowRef",2],
+ "not valid JSON"
+ ''',
+ {'data': None},
+ )]
+ INVALID = [
+ '''
+ []
+ ''',
+ '''
+ ["unsupported",1],
+ {"data":2},
+ {}
+ ''',
+ ]
+ DEFAULT = object()
+
+ self.assertEqual(self.ie._search_nuxt_json(HTML_TMPL.format(VALID_DATA), None), PAYLOAD)
+ self.assertEqual(self.ie._search_nuxt_json('', None, fatal=False), {})
+ self.assertIs(self.ie._search_nuxt_json('', None, default=DEFAULT), DEFAULT)
+
+ for data, expected in PARTIALLY_INVALID:
+ self.assertEqual(
+ self.ie._search_nuxt_json(HTML_TMPL.format(data), None, fatal=False), expected)
+
+ for data in INVALID:
+ self.assertIs(
+ self.ie._search_nuxt_json(HTML_TMPL.format(data), None, default=DEFAULT), DEFAULT)
+
if __name__ == '__main__':
unittest.main()
diff --git a/test/test_devalue.py b/test/test_devalue.py
new file mode 100644
index 0000000000..29eb89e87f
--- /dev/null
+++ b/test/test_devalue.py
@@ -0,0 +1,235 @@
+#!/usr/bin/env python3
+
+# Allow direct execution
+import os
+import sys
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+import datetime as dt
+import json
+import math
+import re
+import unittest
+
+from yt_dlp.utils.jslib import devalue
+
+
+TEST_CASES_EQUALS = [{
+ 'name': 'int',
+ 'unparsed': [-42],
+ 'parsed': -42,
+}, {
+ 'name': 'str',
+ 'unparsed': ['woo!!!'],
+ 'parsed': 'woo!!!',
+}, {
+ 'name': 'Number',
+ 'unparsed': [['Object', 42]],
+ 'parsed': 42,
+}, {
+ 'name': 'String',
+ 'unparsed': [['Object', 'yar']],
+ 'parsed': 'yar',
+}, {
+ 'name': 'Infinity',
+ 'unparsed': -4,
+ 'parsed': math.inf,
+}, {
+ 'name': 'negative Infinity',
+ 'unparsed': -5,
+ 'parsed': -math.inf,
+}, {
+ 'name': 'negative zero',
+ 'unparsed': -6,
+ 'parsed': -0.0,
+}, {
+ 'name': 'RegExp',
+ 'unparsed': [['RegExp', 'regexp', 'gim']], # XXX: flags are ignored
+ 'parsed': re.compile('regexp'),
+}, {
+ 'name': 'Date',
+ 'unparsed': [['Date', '2001-09-09T01:46:40.000Z']],
+ 'parsed': dt.datetime.fromtimestamp(1e9, tz=dt.timezone.utc),
+}, {
+ 'name': 'Array',
+ 'unparsed': [[1, 2, 3], 'a', 'b', 'c'],
+ 'parsed': ['a', 'b', 'c'],
+}, {
+ 'name': 'Array (empty)',
+ 'unparsed': [[]],
+ 'parsed': [],
+}, {
+ 'name': 'Array (sparse)',
+ 'unparsed': [[-2, 1, -2], 'b'],
+ 'parsed': [None, 'b', None],
+}, {
+ 'name': 'Object',
+ 'unparsed': [{'foo': 1, 'x-y': 2}, 'bar', 'z'],
+ 'parsed': {'foo': 'bar', 'x-y': 'z'},
+}, {
+ 'name': 'Set',
+ 'unparsed': [['Set', 1, 2, 3], 1, 2, 3],
+ 'parsed': [1, 2, 3],
+}, {
+ 'name': 'Map',
+ 'unparsed': [['Map', 1, 2], 'a', 'b'],
+ 'parsed': [['a', 'b']],
+}, {
+ 'name': 'BigInt',
+ 'unparsed': [['BigInt', '1']],
+ 'parsed': 1,
+}, {
+ 'name': 'Uint8Array',
+ 'unparsed': [['Uint8Array', 'AQID']],
+ 'parsed': [1, 2, 3],
+}, {
+ 'name': 'ArrayBuffer',
+ 'unparsed': [['ArrayBuffer', 'AQID']],
+ 'parsed': [1, 2, 3],
+}, {
+ 'name': 'str (repetition)',
+ 'unparsed': [[1, 1], 'a string'],
+ 'parsed': ['a string', 'a string'],
+}, {
+ 'name': 'None (repetition)',
+ 'unparsed': [[1, 1], None],
+ 'parsed': [None, None],
+}, {
+ 'name': 'dict (repetition)',
+ 'unparsed': [[1, 1], {}],
+ 'parsed': [{}, {}],
+}, {
+ 'name': 'Object without prototype',
+ 'unparsed': [['null']],
+ 'parsed': {},
+}, {
+ 'name': 'cross-realm POJO',
+ 'unparsed': [{}],
+ 'parsed': {},
+}]
+
+TEST_CASES_IS = [{
+ 'name': 'bool',
+ 'unparsed': [True],
+ 'parsed': True,
+}, {
+ 'name': 'Boolean',
+ 'unparsed': [['Object', False]],
+ 'parsed': False,
+}, {
+ 'name': 'undefined',
+ 'unparsed': -1,
+ 'parsed': None,
+}, {
+ 'name': 'null',
+ 'unparsed': [None],
+ 'parsed': None,
+}, {
+ 'name': 'NaN',
+ 'unparsed': -3,
+ 'parsed': math.nan,
+}]
+
+TEST_CASES_INVALID = [{
+ 'name': 'empty string',
+ 'unparsed': '',
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'hole',
+ 'unparsed': -2,
+ 'error': ValueError,
+ 'pattern': r'invalid integer input',
+}, {
+ 'name': 'string',
+ 'unparsed': 'hello',
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'number',
+ 'unparsed': 42,
+ 'error': ValueError,
+ 'pattern': r'invalid integer input',
+}, {
+ 'name': 'boolean',
+ 'unparsed': True,
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'null',
+ 'unparsed': None,
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'object',
+ 'unparsed': {},
+ 'error': ValueError,
+ 'pattern': r'expected int or list as input',
+}, {
+ 'name': 'empty array',
+ 'unparsed': [],
+ 'error': ValueError,
+ 'pattern': r'expected a non-empty list as input',
+}, {
+ 'name': 'Python negative indexing',
+ 'unparsed': [[1, 2, 3, 4, 5, 6, 7, -7], 1, 2, 3, 4, 5, 6, 7],
+ 'error': IndexError,
+ 'pattern': r'invalid index: -7',
+}]
+
+
+class TestDevalue(unittest.TestCase):
+ def test_devalue_parse_equals(self):
+ for tc in TEST_CASES_EQUALS:
+ self.assertEqual(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
+
+ def test_devalue_parse_is(self):
+ for tc in TEST_CASES_IS:
+ self.assertIs(devalue.parse(tc['unparsed']), tc['parsed'], tc['name'])
+
+ def test_devalue_parse_invalid(self):
+ for tc in TEST_CASES_INVALID:
+ with self.assertRaisesRegex(tc['error'], tc['pattern'], msg=tc['name']):
+ devalue.parse(tc['unparsed'])
+
+ def test_devalue_parse_cyclical(self):
+ name = 'Map (cyclical)'
+ result = devalue.parse([['Map', 1, 0], 'self'])
+ self.assertEqual(result[0][0], 'self', name)
+ self.assertIs(result, result[0][1], name)
+
+ name = 'Set (cyclical)'
+ result = devalue.parse([['Set', 0, 1], 42])
+ self.assertEqual(result[1], 42, name)
+ self.assertIs(result, result[0], name)
+
+ result = devalue.parse([[0]])
+ self.assertIs(result, result[0], 'Array (cyclical)')
+
+ name = 'Object (cyclical)'
+ result = devalue.parse([{'self': 0}])
+ self.assertIs(result, result['self'], name)
+
+ name = 'Object with null prototype (cyclical)'
+ result = devalue.parse([['null', 'self', 0]])
+ self.assertIs(result, result['self'], name)
+
+ name = 'Objects (cyclical)'
+ result = devalue.parse([[1, 2], {'second': 2}, {'first': 1}])
+ self.assertIs(result[0], result[1]['first'], name)
+ self.assertIs(result[1], result[0]['second'], name)
+
+ def test_devalue_parse_revivers(self):
+ self.assertEqual(
+ devalue.parse([['indirect', 1], {'a': 2}, 'b'], revivers={'indirect': lambda x: x}),
+ {'a': 'b'}, 'revivers (indirect)')
+
+ self.assertEqual(
+ devalue.parse([['parse', 1], '{"a":0}'], revivers={'parse': lambda x: json.loads(x)}),
+ {'a': 0}, 'revivers (parse)')
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py
index 1174bd4f5e..6058f66aea 100644
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -101,6 +101,7 @@ from ..utils import (
xpath_with_ns,
)
from ..utils._utils import _request_dump_filename
+from ..utils.jslib import devalue
class InfoExtractor:
@@ -1795,6 +1796,63 @@ class InfoExtractor:
ret = self._parse_json(js, video_id, transform_source=functools.partial(js_to_json, vars=args), fatal=fatal)
return traverse_obj(ret, traverse) or {}
+ def _resolve_nuxt_array(self, array, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Resolves Nuxt rich JSON payload arrays"""
+ # Ref: https://github.com/nuxt/nuxt/commit/9e503be0f2a24f4df72a3ccab2db4d3e63511f57
+ # https://github.com/nuxt/nuxt/pull/19205
+ if default is not NO_DEFAULT:
+ fatal = False
+
+ if not isinstance(array, list) or not array:
+ error_msg = 'Unable to resolve Nuxt JSON data: invalid input'
+ if fatal:
+ raise ExtractorError(error_msg, video_id=video_id)
+ elif default is NO_DEFAULT:
+ self.report_warning(error_msg, video_id=video_id)
+ return {} if default is NO_DEFAULT else default
+
+ def indirect_reviver(data):
+ return data
+
+ def json_reviver(data):
+ return json.loads(data)
+
+ gen = devalue.parse_iter(array, revivers={
+ 'NuxtError': indirect_reviver,
+ 'EmptyShallowRef': json_reviver,
+ 'EmptyRef': json_reviver,
+ 'ShallowRef': indirect_reviver,
+ 'ShallowReactive': indirect_reviver,
+ 'Ref': indirect_reviver,
+ 'Reactive': indirect_reviver,
+ })
+
+ while True:
+ try:
+ error_msg = f'Error resolving Nuxt JSON: {gen.send(None)}'
+ if fatal:
+ raise ExtractorError(error_msg, video_id=video_id)
+ elif default is NO_DEFAULT:
+ self.report_warning(error_msg, video_id=video_id, only_once=True)
+ else:
+ self.write_debug(f'{video_id}: {error_msg}', only_once=True)
+ except StopIteration as error:
+ return error.value or ({} if default is NO_DEFAULT else default)
+
+ def _search_nuxt_json(self, webpage, video_id, *, fatal=True, default=NO_DEFAULT):
+ """Parses metadata from Nuxt rich JSON payloads embedded in HTML"""
+ passed_default = default is not NO_DEFAULT
+
+ array = self._search_json(
+ r'