-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathconverter_boatos_dataset.py
64 lines (55 loc) · 2.02 KB
/
converter_boatos_dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import numpy
import pandas as pd
from carmenta import count_words
from carmenta.controllers.EvaluateObjects import EvaluateObjects
from carmenta.controllers.SemanticAnalyser import SemanticAnalyser
from jano import extractor, Config
from jano.controllers.ArticleExtractor import ArticleExtractor
from jano.controllers.SearchController import SearchController
from jano.models.ArticleObject import ArticleObject
def extract_data(row) -> dict:
dados = {
"original": None,
"meta": []
}
artigo = ArticleExtractor().extract(row['link'])
find = SearchController("none")
artigo_titulo = str(artigo.titulo).replace("#boato", "")
data = find.search(artigo_titulo)
results = extractor(data)
dados["meta"] = results
dados['original'] = ArticleObject(artigo_titulo, row['link'], "none", row['timestamp'], "none", "none", row['hoax'])
return dados
def score(row) -> dict:
data = {
'comparators': [],
'semantic': []
}
SemanticAnalyser.check_packages()
jano_data = extract_data(row)
evaluation = EvaluateObjects.evaluate(jano_data)
data['comparators'] = evaluation
data['semantic'] = {**SemanticAnalyser.gramatica(jano_data['original'].texto),
**SemanticAnalyser.polaridade(jano_data['original'].texto)}
data['semantic']['length'] = count_words(jano_data['original'].texto)
return data
def translate_to_keras(row) -> list:
info = dict()
for head in Config.values()['headers']:
info[head] = 0
info.pop('result', None)
data = score(row)
for key in data['comparators'].keys():
info[key] = data['comparators'][key]
for key in data['semantic'].keys():
info[key] = data['semantic'][key]
return [v for k, v in info.items()]
df = pd.read_csv('boatos.csv', header=0)
for index, row in df.iterrows():
try:
print(index)
result = translate_to_keras(row) + [0.0]
with open("test.csv", "a") as myfile:
myfile.write(str(result) + "\n")
except Exception:
pass