-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtagEx.py
130 lines (89 loc) · 2.91 KB
/
tagEx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from bs4 import BeautifulSoup as bs
import os
from sys import argv
import requests as rq
class TagContext(object):
def __init__(self, htmlsource):
self.htmlsource = htmlsource
self.bs = self.bsInit()
self.currentTag = 0
self.subset = ''
def bsInit(self):
try:
conn = bs(self.htmlfromUrl(), 'html5lib')
except:
with open(self.htmlsource, 'r') as f:
conn = bs(f.read(), 'html5lib')
return conn
def htmlfromUrl(self):
return rq.get(self.htmlsource).text
def setSubset(self, tag):
self.subset = self.bs.find_all(tag)
def getAttrib(self, attr):
try:
result = self.subset[self.currentTag].get(attr)
except:
result = "No Attrib Found"
return result
def setCurrentTag(self, tagNumber):
self.currentTag = tagNumber
self.printself()
def printself(self):
for i in range(len(self.subset)):
if i == self.currentTag:
print("\n << " + str(i) + " >> " + str(self.subset[i]))
else:
print('\n'+str(i)+' - '+str(self.subset[i]))
class UI(object):
close = 0
def __init__(self, context):
self.context = context
self.cmd = {0:self.context.printself,
1:self.setContextSubset,
2:self.setContextCurrentTag,
3:self.getContextAttrib
}
def handleCmd(self):
command = input("cmd: ")
if command == "x" or command == "X":
self.close = 1
if not command.isnumeric():
return
command = int(command)
os.system("clear")
self.cmd[command]()
def printOpts(self):
print("\n[0] Print Selected Tags")
print("[1] Select New Tag Group to Check")
print("[2] Select Individual Tag")
print("[3] Extract Tag Attribute")
print("[X] Exit\n")
def setContextSubset(self):
tag = input("\nTag Group to Check > ")
self.context.setSubset(tag)
if (not self.context.subset):
print("\nNo Tag Group Found")
def setContextCurrentTag(self):
tagNumber = int(input("\nNew Tag Number > "))
self.context.setCurrentTag(tagNumber)
def getContextAttrib(self):
attr = str(input("\nAttr > "))
print('\n'+ self.context.getAttrib(attr) + '\n')
def MainLoop(htmlsource):
context = TagContext(htmlsource)
ui = UI(context)
ui.setContextSubset()
context.printself()
while not ui.close:
ui.printOpts()
ui.handleCmd()
if __name__ == "__main__":
if len(argv) < 1:
print("Pass an html source (file or url)")
exit(1)
if len(argv) == 2:
MainLoop(argv[1])
if len(argv) == 4:
context = TagContext(argv[1])
context.setSubset(argv[2])
print(context.getAttrib(argv[3]))