-
Notifications
You must be signed in to change notification settings - Fork 24
/
ytdump.py
132 lines (113 loc) · 3.76 KB
/
ytdump.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
"""
A tool for investigating youtube json dictionaries.
This tries to pretty print the rather complex json dictionaries youtube uses.
You can pass the json either through stdin, pass it as a string on the commandline,
or as a filename on the commandline.
Author: Willem Hengeveld <[email protected]>
"""
import json
import sys
import os.path
def extractruns(runs):
"""
Extract all text in a 'runs' dictionary.
"""
text = []
for r in runs:
text.append(r.get('text'))
return "".join(text)
def pathendswith(path, *end):
"""
A helper for matching paths in the json dictionary.
"""
if len(end) > len(path):
return False
for a, b in zip(path[-len(end):], end):
if type(b)==type:
if type(a)!=b:
return False
elif type(b)==int:
if a != b:
return False
elif type(a)==int:
return False
elif b[:1] == '*':
if not a.endswith(b[1:]):
return False
else:
if a != b:
return False
return True
def processRender(j, path):
"""
print all properties directly under 'j'
"""
info = []
for k, item in j.items():
if type(item) in (int, float, str, bool):
info.append((k, item))
elif type(item) != dict:
pass
elif runs := item.get('runs'):
info.append((k, extractruns(runs)))
elif text := item.get("simpleText"):
info.append((k, text))
indent = " " * len(path)
print(indent, "==== %s" % (path[::-1],))
for k, v in info:
print(indent, "| %-20s : %s" % (k, v))
def process(j, path=[]):
"""
recursively process the json dictionary passed in 'j'.
Printing all 'Renderer' dictionaries in detail, indented according to path length.
The path is the list of keys needed to find the current entry from the top.
"""
if path:
if pathendswith(path, "*Renderer"):
if type(j)!=dict:
print("WARNING: Renderer without dict", path)
else:
processRender(j, path)
elif pathendswith(path, "continuations"):
if not pathendswith(path, "*Renderer", "continuations"):
print("WARNING: continuations without renderer", path)
pass
elif pathendswith(path, "nextContinuationData"):
if not pathendswith(path, "continuations", int, "nextContinuationData"):
print("WARNING: nextContinuationData without continuations", path)
pass
elif pathendswith(path, "continuation"):
if not pathendswith(path, "nextContinuationData", "continuation"):
print("WARNING: continuation without nextContinuationData", path)
pass
if type(j) == list:
for i, item in enumerate(j):
process(item, path + [i])
elif type(j) == dict:
for k, item in j.items():
process(item, path + [k])
elif type(j) in (int, float, str, bool, type(None)):
pass
else:
print("WARNING: unexpected type", type(j), j)
def main():
if len(sys.argv)==1:
data = sys.stdin.read()
j = json.loads(data)
process(j)
else:
for arg in sys.argv[1:]:
if os.path.exists(arg):
try:
with open(arg, "r") as fh:
print("==>", arg, "<==")
j = json.load(fh)
process(j)
except Exception as e:
print("ERROR reading %s: %s" % (arg, e))
else:
print("==> json commandline argument <==")
j = json.loads(arg)
process(j)
if __name__ == '__main__':
main()