-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsentimentAnalysis.py
51 lines (46 loc) · 2.8 KB
/
sentimentAnalysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import pandas as pd
from typing import List
from transformers import pipeline, AutoModelForSequenceClassification
import os
from tqdm import tqdm
tqdm.pandas()
if __name__ == '__main__':
mechanisms:List[str] = ['CMP', 'Mahalanobis', 'VickreyCMP', 'VickreyMhl', 'SanText', 'CusText', 'TEM']
epsilons:List[float] = [1, 5, 10, 12.5, 15, 17.5, 20, 50]
#initialize the sentiment analysis pipeline
MODEL = 'cardiffnlp/twitter-roberta-base-sentiment-latest'
model = AutoModelForSequenceClassification.from_pretrained(MODEL)
sentiment_analysis = pipeline('sentiment-analysis', model=model, tokenizer=MODEL)
df_sent = pd.DataFrame(columns=['id','obfuscatedText', 'trueLabel', 'predictedLabel', 'mechanism', 'epsilon'])
results = []
for mech in mechanisms:
print(f'Processing mechanism: {mech}')
for e in epsilons:
print(f'Processing epsilon: {e}')
df = pd.read_csv(f'./results/sentimentAnalysis/{mech}/obfuscatedText_{mech}_{e}.csv', sep=',', header=0)
df_sent['trueLabel'] = df['sentiment']
df['obfuscatedText'] = df['obfuscatedText'].astype(str)
df_sent['predictedLabel'] = df['obfuscatedText'].progress_apply(lambda x: sentiment_analysis(x)[0]['label'])
df_sent['id'] = df['id']
df_sent['mechanism'] = mech
df_sent['epsilon'] = e
#calculate the accuracy
accuracy = (df_sent['trueLabel'].str.lower() == df_sent['predictedLabel'].str.lower()).mean()
tuple_mech = (mech, e, accuracy)
results.append(tuple_mech)
list_of_results = [{'mechanism': mech, 'epsilon': e, 'accuracy': accuracy} for mech, e, accuracy in results]
results_df = pd.DataFrame(list_of_results, columns=['mechanism', 'epsilon', 'accuracy'])
#create a pivot table with the results
pivotAcc = results_df.pivot(index='mechanism', columns='epsilon', values='accuracy')
#save the pivot table to a csv file
if not os.path.exists('./results/sentimentAnalysis/evaluation'):
os.mkdir('./results/sentimentAnalysis/evaluation')
pivotAcc.to_csv('./results/sentimentAnalysis/evaluation/sentimentAnalysisAccuracy.csv')
#if you want also to get the accuracy on the original texts,
#just perform the same operations on the original text (it is irrelevant from which dataset you get the text, just be sure to get the text column and the sentiment column )
#df = pd.read_csv('./results/sentimentAnalysis/CMP/obfuscatedText_CMP_1.csv', sep=',', header=0)
#df['text'] = df['text'].astype(str)
#df['trueLabel'] = df['sentiment']
#df['predictedLabel'] = df['text'].progress_apply(lambda x: sentiment_analysis(x)[0]['label'])
#accuracy = (df['trueLabel'].str.lower() == df['predictedLabel'].str.lower()).mean()
#print(f'Accuracy on original text: {accuracy}')