-
Notifications
You must be signed in to change notification settings - Fork 0
/
pub_methods.py
155 lines (135 loc) · 4.84 KB
/
pub_methods.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import os
import json
import reporting
import requests
import hashlib
def update_filters(filters):
# create working flag
try:
if os.path.exists('temp/working.flag'):
os.remove('temp/working.flag')
working_flag = open('temp/working.flag','w+')
working_flag.write('x')
working_flag.close()
# open output file for writing
filehandle = open('temp/filters.dat','w+')
json.dump(filters,filehandle)
if os.path.exists('temp/working.flag'):
os.remove('temp/working.flag')
print("\r \r"),
print("\rfilters updated...\r"),
except Exception as e:
reporting.report('error', 'critical', 'pub_methods module > update_filters', str(e))
return None
def load_filters():
try:
# wait if base engine is updating filters from Database
_stop_flag = True
while _stop_flag:
if(os.path.exists('temp/working.flag')):
sleep(0.5)
else:
_stop_flag = False
# load filters from file
if os.path.exists('temp/filters.dat'):
_filters = ''
with open('temp/filters.dat','r') as f:
_filters = json.load(f)
return _filters;
else:
reporting.report('error', 'critical', 'pub_methods module > load_filters (X01)', str(e))
return None
except Exception as e:
reporting.report('error', 'critical', 'pub_methods module > load_filters (X02)', str(e))
return None
def get_onion_content(url, session):
try:
_content = session.get(url, verify=False).text
print("Content URL : " + url)
print("Content length : " + str(len(_content)))
print("------------------------------------------------------------------------")
return _content
except Exception,e:
reporting.report('error', 'critical', 'crawler > get_onion_content', str(e))
return None
def generate_hash(_input):
m = hashlib.md5()
m.update(_input)
return str(m.hexdigest())
def thread_count():
_dirs = os.listdir('threads')
return len(_dirs)
def thread_remove(identifier):
if os.path.exists('threads/' + identifier):
os.unlink('threads/' + identifier)
def thread_clear():
folder = 'threads'
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
#elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
print(e)
def thread_identifier(_h='x'):
_index = 01
_base = 'thread_' + _h + '_'
for i in range(1,500):
_file = _base + str(_index) + '.session'
if os.path.exists('threads/' + _file):
_index += 1
continue
else:
f = open('threads/' + _file,'w+')
return _file
def subthread_count():
_dirs = os.listdir('threads_sub')
return len(_dirs)
def subthread_remove(identifier):
if os.path.exists('threads_sub/' + identifier):
os.unlink('threads_sub/' + identifier)
def subthread_clear():
folder = 'threads_sub'
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
#elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
print(e)
def subthread_identifier(_parent='x',_url=''):
_index = 01
_base = 'thread_' + _parent + '_' + _url
m = hashlib.md5()
m.update(_base)
_base = str(m.hexdigest()) + '_'
for i in range(1,500):
_file = _base + str(_index) + '.session'
if os.path.exists('threads_sub/' + _file):
_index += 1
continue
else:
f = open('threads_sub/' + _file,'w+')
return _file
def subcrawl_links_export(_list,_identifier):
with open('queue/' + _identifier, 'w') as f :
f.write(json.dumps(_list))
def subcrawl_links_import(_identifier):
with open('queue/' + _identifier, 'r') as f:
_list = json.loads(f.read())
return _list
def subcrawl_links_remove(_identifier):
if os.path.exists('queue/' + _identifier):
os.unlink('queue/' + _identifier)
def subcrawl_links_clear():
folder = 'queue'
for the_file in os.listdir(folder):
file_path = os.path.join(folder, the_file)
try:
if os.path.isfile(file_path):
os.unlink(file_path)
#elif os.path.isdir(file_path): shutil.rmtree(file_path)
except Exception as e:
print(e)