forked from Reguix/association-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
649 lines (545 loc) · 24.5 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
# -*- coding: utf-8 -*-
#!/usr/bin/env python
"""
Created on Thu Mar 28 19:02:40 2019
关联分析模型(隐私计算分析取证)
运用的关联分析方法有:基于规则的关联分析方法、基于统计的关联分析、基于数据挖掘的关联分析
"""
import json
import copy
import graphviz
import pydot
import random
import time
import os
import sys
import re
import uuid
import sqlite3
import hashlib
import send2trash
import argparse
import shutil
import community
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import math
import pdfkit
import pandas as pd
from igraph import *
from collections import OrderedDict
import networkx as nx
from multiprocessing import Pool
from functools import partial
from parser_table import parser_table
# 生成海量模拟日志
def genHugeLog(patternJsonFilePath, logFileDirPath, numOfLog):
"""
模拟日志格式
{
"os" : "",
"timestamp" : 0,
"pid" : 0,
"ppid" : 0,
"network" : {
"local_ip" : "",
"foreign_ip" : ""
}
}
"""
osTypeList = ["windows", "centos", "ubuntu", "redhat"]
ipList = ["111.111.111.111", "222.222.222.222", "233.233.233.233",
"234.234.234.234", "235.235.235.235", "236.236.236.236"]
with open(patternJsonFilePath, "r") as patternJsonFile:
pattern = json.load(patternJsonFile, object_pairs_hook=OrderedDict)
# print(type(pattern))
# print(pattern)
logDictList = list()
for index in range(numOfLog):
logDict = copy.deepcopy(pattern)
# print(pattern)
logDict["os"] = osTypeList[random.randint(0, len(osTypeList) - 1)]
logDict["timestamp"] = random.randint(0, 5 * numOfLog)
logDict["ppid"] = random.randint(5000, 5000 + numOfLog / 20)
logDict["pid"] = random.randint(logDict["ppid"], 7000 + numOfLog / 20)
# while logDict["ppid"] >= logDict["pid"]:
# logDict["ppid"] = random.randint(5000, 5000 + numOfLog / 25)
# print(ipList)
if (index % 50 == 0):
tmpIpList = copy.deepcopy(ipList)
# print(tmpIpList)
logDict["network"]["local_ip"] = tmpIpList[random.randint(0, (len(tmpIpList) - 1))]
# print(logDict["network"]["local_ip"])
tmpIpList.remove(logDict["network"]["local_ip"])
logDict["network"]["foreign_ip"] = tmpIpList[random.randint(0, (len(tmpIpList) - 1))]
logDictList.append(logDict)
logFilePath = os.path.join(logFileDirPath, str(uuid.uuid4()) + ".json")
with open(logFilePath, "w") as logFile:
json.dump(logDictList, logFile, sort_keys=False, indent=4, separators=(",", " : "))
def isMerge(logDictA, logDictB, associationDict):
# 完全相同
# logStrA = json.dumps(logDictA)
# logStrB = json.dumps(logDictB)
# if logStrA == logStrB:
# return True
# # top字段完全相同且extra字段不冲突
# topKeyList = associationDict["topKeyList"]
# for topKey in topKeyList:
# # top字段每条日志都要有,没有就默认值,实在没有,就在这里加判断
# # if topKey in logDictA.keys() and topKey in logDictB.keys():
# jsonStrA = json.dumps(logDictA[topKey])
# jsonStrB = json.dumps(logDictB[topKey])
# if jsonStrA != jsonStrB:
# return False
# extraKeyList = associationDict["extraKeyList"]
# for extraKey in extraKeyList:
# if extraKey in logDictA.keys() and extraKey in logDictB.keys():
# jsonStrA = json.dumps(logDictA[topKey])
# jsonStrB = json.dumps(logDictB[topKey])
# if jsonStrA != jsonStrB:
# return False
keySet = set(logDictA.keys()).union(set(logDictB.keys()))
for key in keySet:
if key in logDictA.keys() and key in logDictB.keys():
jsonStrA = json.dumps(logDictA[key])
jsonStrB = json.dumps(logDictB[key])
if jsonStrA != jsonStrB:
return False
return True
# 定义关联规则(基于规则的关联分析)
def isConnect(logDictA, logDictB, associationDict):
"""
定义关联规则,互斥的字段直接判断无连接,然后递归遍历判断相等字段
之后可以定义更复杂的规则,只有当两条日志间的关联程度足够强才连接两个节点
"""
# 互斥字段(顶层字段,所有日志都必须填充的字段)
mutexKeyList = associationDict["mutexKeyList"]
# 直接相等字段(最底层的键)
equalKeyList = associationDict["equalKeyList"]
# 交叉相等字段
crossEqualTupleList = associationDict["crossEqualTupleList"]
# 默认值字典
defaultValueDict = associationDict["defaultValueDict"]
# TODO: 这里添加更多复杂关联
# 首先处理互斥字段
for key in mutexKeyList:
if key in logDictA.keys() and key in logDictB.keys():
if logDictA[key] != logDictB[key]:
return False
for key in logDictA.keys():
# 处理嵌套的字典
if isinstance(logDictA[key], dict) and key in logDictB.keys():
if(isConnect(logDictA[key], logDictB[key], associationDict)):
return True
elif key in equalKeyList:
# 处理直接相等字段
if key in logDictA.keys() and key in logDictB.keys():
if logDictA[key] == logDictB[key] and logDictA[key] != defaultValueDict[key]:
return True
else:
# 处理交叉相等字段
for tupleItem in crossEqualTupleList:
if key in tupleItem:
for item in tupleItem:
if item in logDictB.keys() and item != key:
if logDictA[key] == logDictB[item] and logDictA[key] != defaultValueDict[key]:
return True
return False
# 生成日志图
def genLogGraphFromLogDir(logFileDirPath, associationDict):
print("初始化日志图信息")
defaultColor = {"color": {"r": 30, "g": 144, "b": 255, "a": 0}}
graphGexfFilePath = "logGraph.gexf"
graphFigFilePath = "logGraph.jpg"
graph = nx.Graph()
for root, dirs, files in os.walk(logFileDirPath):
for file in files:
if file.endswith(".json"):
logFilePath = os.path.join(root, file)
with open(logFilePath, "r") as logFile:
logDictList = json.load(logFile)
for logDict in logDictList:
logStr = json.dumps(logDict)
logLabel = str(uuid.uuid4())
graph.add_node(logLabel)
graph.nodes[logLabel]["viz"] = defaultColor
graph.nodes[logLabel]["logStr"] = logStr
nodeList = copy.deepcopy(list(graph.nodes))
for i, nodeA in enumerate(nodeList):
if nodeA not in list(graph.nodes):
continue
for nodeB in nodeList[(i + 1):]:
if nodeB not in list(graph.nodes):
continue
# print("A: %s, B: %s" % (graph.nodes[nodeA], graph.nodes[nodeB]))
logDictA = json.loads(graph.nodes[nodeA]["logStr"])
logDictB = json.loads(graph.nodes[nodeB]["logStr"])
if(isMerge(logDictA, logDictB, associationDict)):
keySet = set(logDictA.keys()).union(set(logDictB.keys()))
print("A: %s, B: %s" % (graph.nodes[nodeA], graph.nodes[nodeB]))
# 把节点B的属性合并到节点A中
for key in keySet:
if key not in logDictA.keys():
logDictA[key] = logDictB[key]
# 更新节点A的属性
graph.nodes[nodeA]["logStr"] = json.dumps(logDictA)
# 把节点B的连接的边,连接到节点A上,避免自环
for nodeAdj in list(graph.adj[nodeB]):
if nodeAdj != nodeA:
graph.add_edge(nodeAdj, nodeA)
# 在图中删除节点B
graph.remove_node(nodeB)
elif(isConnect(logDictA, logDictB, associationDict)):
graph.add_edge(nodeA, nodeB)
nx.write_gexf(graph, graphGexfFilePath)
#print("Generate log graph done!")
print("原始日志图包含节点数(日志条目数): %s" % graph.number_of_nodes())
print("原始日志图包含边数: %s" % graph.number_of_edges())
print("原始日志图静态图: ")
print('<img src="./logGraph.jpg" alt="原始日志图静态图" />')
network_draw(graph, graphFigFilePath)
print('<a href="./graphView/index.html" target="_blank">点击查看原始日志动态图</a> 包含了全部日志信息。')
return graph
# 日志图连通性过滤
def connectedFilter(graph, limit=10):
"""
分离出完全不相关的事件,过滤掉包含日志数目为1个或较少的事件(可设置更加复杂的过滤条件)
"""
print("连通性分析信息")
# "wccsg" means "weakly connected component subgraph"
wccsgList = sorted(nx.connected_component_subgraphs(graph),
key=len, reverse=True)
wccsgLenList = [len(subgraph) for subgraph in wccsgList]
print("根据连通性划分得到不相关的安全事件数: %s" % len(wccsgLenList))
print("每个安全事件中包含的日志条目数列表如下: ")
print(wccsgLenList)
# 生成日志图事件概述(基于统计的关联分析)
print("事件中日志条目统计结果: ")
autoLimit = describe(wccsgLenList)
print("事件中日志条目统计图表: ")
print('<img src="./statistics.jpg" alt="事件中日志条目统计" />')
histAndBoxPlot(wccsgLenList, "事件中包含的日志条目", "statistics.jpg")
print("系统统计计算推荐的事件大小过滤值: %d" % int(autoLimit))
print("选项--limit指定的事件大小过滤值: %d" % int(limit))
limit = max(limit, autoLimit)
print("最终确定的过滤值为(包含日志条目小于此值的事件会被过滤掉): %d" % limit)
# 过滤掉包含日志较少的事件(可以定义更复杂的筛选)
remainList = [graph for graph in wccsgList if len(graph) > limit]
removeList = [graph for graph in wccsgList if len(graph) <= limit]
connectedFilterGraph = copy.deepcopy(graph)
for g in removeList:
connectedFilterGraph.remove_nodes_from(g.nodes)
# 为不同的事件图着色
newColor = {"color": {"r": 30, "g": 144, "b": 255, "a": 0}}
usedColor = [str(newColor)]
for g in remainList:
while str(newColor) in usedColor:
newColor = randomColor()
usedColor.append(str(newColor))
for node in g.nodes:
connectedFilterGraph.nodes[node]["viz"] = newColor
nx.write_gexf(connectedFilterGraph, "connectedFilterGraph.gexf")
print("初步过滤信息 ")
print("日志图包含节点数(日志条目数): %s" % connectedFilterGraph.number_of_nodes())
print("日志图包含边数: %s" % connectedFilterGraph.number_of_edges())
print("日志规模下降为原来的: %.2f%%!" % (connectedFilterGraph.number_of_nodes() / graph.number_of_nodes() * 100))
print("连通性过滤后的日志静态图: ")
print('<img src="./connectedFilterGraph.jpg" alt="连通性过滤后的日志静态图" />')
network_draw(connectedFilterGraph, "connectedFilterGraph.jpg")
print("注: 不同的事件以不同的颜色标明")
print('<a href="./graphView/index.html" target="_blank">点击查看过滤后的日志动态图</a> 包含了全部事件信息。')
return connectedFilterGraph, remainList
# 日志图社区检测过滤(基于数据挖掘的关联分析)
def communityFilter(connectedFilterGraph, subgraphList):
"""
较大事件进行社区聚类分析,进一步划分事件,减小事件规模
"""
print("社区检测信息")
communityFilterGraph = copy.deepcopy(connectedFilterGraph)
comsgLists, comsgLenLists= list(), list()
# 对较大的事件进行社区检测,并进行渐变色着色,社区越大颜色越明亮
for i, g in enumerate(subgraphList):
comsgList, comsgLenList = communityDetect(g)
comsgLists.append(comsgList)
comsgLenLists.append(comsgLenList)
curColor = copy.deepcopy(communityFilterGraph.nodes[list(g.nodes)[0]]["viz"])
for j, comsg in enumerate(list(reversed(comsgList))):
if len(comsgList) == 1:
continue
newColor = brighterColor(curColor, len(comsgList) - j)
for node in comsg.nodes:
communityFilterGraph.nodes[node]["viz"] = copy.deepcopy(newColor)
gexfFilePath = "./event/event" + str(i) + "_"+ str(len(comsgList) - j - 1) + ".gexf"
nx.write_gexf(communityFilterGraph.subgraph(comsg.nodes), gexfFilePath)
gexfFilePath = "./event/event" + str(i) + ".gexf"
nx.write_gexf(communityFilterGraph.subgraph(g.nodes), gexfFilePath)
nx.write_gexf(communityFilterGraph, "communityFilterGraph.gexf")
# TODO:也可以对过小的社区进行过滤
print("每个安全事件中包含的日志条目数列表如下(嵌套表示):")
print(comsgLenLists)
print("社区检测后的事件日志静态图(单个事件为例): ")
print('<img src="./communityFilterGraph.jpg" alt="社区检测后的事件日志静态图" />')
network_draw(communityFilterGraph.subgraph(subgraphList[0]), "communityFilterGraph.jpg")
print("注: 单个事件划分为多个社区,社区内包含的日志条目越多颜色越明亮")
print('<a href="./graphView/index.html" target="_blank">点击查看社区检测日志动态图</a> 包含了全部社区信息。')
return communityFilterGraph, comsgLists
def communityDetect(graph):
# "comsg" means "community Subgraph"
comsgList = list()
# partitionDict : {nodeId : communityId}
partitionDict = partitionDict = community.best_partition(graph)
numOfCom = len(set(partitionDict.values()))
for comId in range(numOfCom):
comNodesList = [node for node in partitionDict.keys()
if partitionDict[node] == comId]
comsg = graph.subgraph(comNodesList).copy()
comsgList.append(comsg)
comsgList = sorted(comsgList, key=len, reverse=True)
comsgLenList = [len(subgraph) for subgraph in comsgList]
return comsgList, comsgLenList
# 溯源分析(源-->目的地,树形追溯)
def backtrace(item, eventGraph):
"""
item是一个二元组类型 (sourceKey, destKey)
将要分析的事件的图和要回溯分析的关键词传进来就可以分析了,比如(ppid, pid)
"""
DG = nx.DiGraph()
for node in eventGraph.nodes:
logDict = json.loads(eventGraph.nodes[node]["logStr"])
src = logDict[item[0]]
dest = logDict[item[1]]
if src == dest:
continue
if (src in DG.nodes and dest in DG.nodes):
continue
if dest in DG.nodes:
if DG.in_degree(dest) != 0:
continue
DG.add_edge(src, dest)
# wccsgList = sorted(nx.weakly_connected_component_subgraphs(DG), key=len, reverse=True)
# plt.figure()
# pos = nx.nx_pydot.graphviz_layout(wccsgList[0], prog='dot')
# nx.draw(DG, pos=pos, with_labels=True)
# plt.savefig("tree.jpg")
dot = nx.nx_pydot.to_pydot(DG)
print('<img src="./backtrace.jpg" alt="回溯分析静态图" />')
dot.write_jpeg("backtrace.jpg")
print("注:图中标明了回溯对象的值")
print('<a href="./graphView/index.html" target="_blank">点击查看回溯分析动态图</a> 包含了全部信息。')
nx.write_gexf(DG, "backtrace.gexf")
# 序列性事件分析 (比如时间序列,进行排序,事件间隔过大的断开连接)
def seriesAnalysis(seriesKey, eventGraph):
"""
seriesKey是序列性数据的键,目前就是将事件的日志排序输出到表格
"""
tupleList = list()
for node in eventGraph.nodes:
logStr = eventGraph.nodes[node]["logStr"]
logDict = json.loads(logStr)
seriesData = logDict[seriesKey]
tupleList.append((seriesData, logStr))
tupleList.sort()
logStrList = [logStr for (seriesData, logStr) in tupleList]
seriesDataList = [seriesData for (seriesData, logStr) in tupleList]
dataDict = {seriesKey: seriesDataList,
"日志": logStrList}
df = pd.DataFrame(dataDict)
df.to_csv("seriesAnalysis.csv", encoding="utf_8")
old_width = pd.get_option("display.max_colwidth")
pd.set_option("display.max_colwidth", -1)
# df.to_html("seriesAnalysis.html",escape=False,index=False,sparsify=True,border=0,index_names=False,header=False)
df.to_html("seriesAnalysis.html")
pd.set_option("display.max_colwidth", old_width)
print("insert html seriesAnalysis.html")
# print(df)
# 僵尸ip分析(基于规则的关联分析)
def botIpAttack(eventGraph):
pass
# 生成关联分析报告
def report():
sys.stdout.flush()
parserTxtToHtml()
parserHtmlToPdf()
# 工具函数
def cleanDir(dirPath, preservationFileList=[], toTrash=True):
for fileName in os.listdir(dirPath):
if fileName not in preservationFileList:
filePath = os.path.join(dirPath, fileName)
if toTrash:
send2trash.send2trash(filePath)
else:
if os.path.isfile(filePath):
os.unlink(filePath)
if os.path.isdir(filePath):
shutil.rmtree(filePath)
def randomColor():
color = {"color": {"r": 30, "g": 144, "b": 255, "a": 0}}
color["color"]["r"] = random.randint(0, 155)
color["color"]["g"] = random.randint(0, 155)
color["color"]["b"] = random.randint(0, 155)
return color
def brighterColor(color, increment):
colorList = list(color["color"].values())
max_color , max_index = 0, 0
for i, c in enumerate(colorList):
if (c > max_color):
max_index = i
max_color = c
max_color = max_color + int((255 - max_color) / increment)
color["color"][list(color["color"].keys())[max_index]] = max_color
return color
def convertToIgraph():
pass
def histAndBoxPlot(dataList, dataLabel, figFilePath):
plt.rcParams["font.sans-serif"]=["SimHei"]
data = np.array(dataList)
fig = plt.figure(figsize =(9,5))
# boxplot
axBoxplot = fig.add_subplot(1,2,1)
axBoxplot.set_ylabel(dataLabel)
axBoxplot.yaxis.set_major_locator(ticker.MultipleLocator(int(max(data) / 15)))
axBoxplot.set_title("箱型图")
axBoxplot.boxplot(data,sym='o',whis=1.5, showmeans=True)
# hist
axhist = fig.add_subplot(1,2,2)
axhist.set_xlabel(dataLabel)
axhist.xaxis.set_major_locator(ticker.MultipleLocator(int(max(data) / 10)))
axhist.set_ylabel("频数")
axhist.set_title("直方图")
axhist.hist(data,bins=40, density=0, facecolor="blue", edgecolor="black", alpha=0.7)
fig.tight_layout()
#figurePdfFilePath = dataLabel + ".jpg"
plt.savefig(figFilePath)
# plt.show()
#
def describe(dataList,labels=""):
pd.set_option('precision', 0)
pd.set_option('display.unicode.ambiguous_as_wide', True)
pd.set_option('display.unicode.east_asian_width', True)
# dataLists, labels = list(), list()
# dataLists.append(dataList)
# labels.append(label)
# dataFrame = pd.DataFrame(dataLists)
# dataFrame = dataFrame.T
# statisticsDataFrame = dataFrame.describe()
# statisticsDataFrame.columns = labels
s = pd.Series(dataList)
# ss means statisticsSeries
ss = s.describe()
# statisticsDataFrame.round(2)
index = ["参与统计: ","平均值: ","标准差: ","最小值: ","25%: ","50%: ","75%: ","最大值: "]
statisticsDataFrame= pd.Series(ss.values, index=index)
print(statisticsDataFrame)
if ss["min"] != ss["max"]:
limit = (ss["min"] + ss["max"] / ss["mean"] * ss["min"] / ss["75%"])
else:
limit = 0
limit = min(ss["mean"], limit)
return limit
def parserTxtToHtml():
htmlHead = ('<!Doctype html><html><head><title>关联分析报告</title><meta charset="utf8" /></head><body>'
"<h1>关联分析报告</h1>")
htmlTail = ("</body></html>")
with open("report.html", "w", encoding="utf-8") as htmlFile:
# htmlFile.seek(0)
# htmlFile.truncate()
htmlFile.write(htmlHead)
txtFile = open("report.txt", "r", encoding="utf-8")
lines = txtFile.readlines()
for line in lines:
if "dtype" in line:
continue
if ":" in line or "[" in line or "<" in line:
newLine = "<p>" + line + "</p>"
elif "insert" not in line:
newLine = "<h2>" + line + "</h2>"
else:
with open(line.strip().split(" ")[-1], "r") as insertFile:
newLine = "".join(insertFile.readlines())
newLine = "<p>" + newLine + "</p>"
htmlFile.write(newLine)
htmlFile.write(htmlTail)
def parserHtmlToPdf():
# confg = pdfkit.configuration(wkhtmltopdf="D:\\wkhtmltox\\bin\\wkhtmltopdf.exe")
options = {
"enable-local-file-access": None,
}
pdfkit.from_file("report.html", 'report.pdf', options=options)
class Logger(object):
def __init__(self, filename="report.txt"):
self.terminal = sys.stdout
reStdout = open(filename, "a", encoding="utf-8")
reStdout.seek(0)
reStdout.truncate()
self.log = reStdout
def write(self, message):
self.terminal.write(message)
self.log.write(message)
def flush(self):
self.log.flush()
def close(self):
self.log.close()
def network_draw(graph, saveFigPath):
plt.figure()
node_color = list()
for node in graph.nodes:
color = [30 / 255, 144 / 255, 255 / 255]
color[0] = graph.nodes[node]["viz"]["color"]["r"] / 255
color[1] = graph.nodes[node]["viz"]["color"]["g"] / 255
color[2] = graph.nodes[node]["viz"]["color"]["b"] / 255
node_color.append(color)
pos = nx.kamada_kawai_layout(graph)
nx.draw(graph, pos=pos, node_color=node_color, node_size = 60, edge_color="gray", with_labels=False)
plt.savefig(saveFigPath)
def notBool(b):
return bool(1-b)
# 主函数
def main(pattern, log_dir, config, num_log, limit):
# 清理当前目录,新建event目录保存分析结果
# ignoreFileList = ["main.py","funcTest.py", "graphView", "test.json", pattern, "log"]
ignoreFileList = [".gitignore"]
cleanDir("./event", ignoreFileList, toTrash=False)
try:
os.unlink("report.txt")
except FileNotFoundError:
pass
# os.mkdir("event")
# 重定向输出
sys.stdout = Logger()
#清理上次生成的模拟日志,并生成新的模拟日志
cleanDir(log_dir, ignoreFileList, toTrash=False)
genHugeLog(pattern, log_dir, num_log)
# 解析关联关系配置文件
associationDict = parser_table(config)
# 生成日志图
graph = genLogGraphFromLogDir(log_dir, associationDict)
# 联通性分析
connectedGraph, subgraphList = connectedFilter(graph, limit)
# 社区检测
communityFilterGraph, comsgLists = communityFilter(connectedGraph, subgraphList)
# 溯源分析示例(进程树)
print("回溯分析示例")
print("以父子进程(ppid, pid)为例进行回溯分析: ")
if os.path.exists("./event/event0_0.gexf"):
eventGraph = nx.read_gexf("./event/event0_0.gexf")
else:
eventGraph = nx.read_gexf("./event/event0.gexf")
backtrace(("ppid", "pid"), eventGraph)
# 序列性分析示例(时间序列)
print("序列性数据分析示例")
print("以时间戳(timestamp)为例进行序列性分析:")
# eventGraph = nx.read_gexf("./event/event0_0.gexf")
seriesAnalysis("timestamp", eventGraph)
# 生成html和pdf报告
report()
if __name__ == "__main__":
argparser = argparse.ArgumentParser()
argparser.add_argument("--pattern", type=str, default="pattern.json")
argparser.add_argument("--log_dir", type=str, default="./log")
argparser.add_argument("--config", type=str, default="association_table.md")
argparser.add_argument("--num_log", type=int, default=2000)
argparser.add_argument("--limit", type=int, default=10)
args = argparser.parse_args()
# print(vars(args))
main(**vars(args))