diff --git a/requirements.txt b/requirements.txt index 3415e3843..b2eeadd16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ checklist==0.0.11 spacy==3.0.0 numpy + # for back_translation torch diff --git a/transformations/negated_antonym_perturbation/README.md b/transformations/negated_antonym_perturbation/README.md new file mode 100644 index 000000000..bddcafde3 --- /dev/null +++ b/transformations/negated_antonym_perturbation/README.md @@ -0,0 +1,27 @@ +# Negated Antonym Perturbation +This perturbation rephrases the adjectives and adverbs to their negated antonym + +Authors: Mayukh Das (Technical University of Braunschweig / mayukh.das@tu-bs.de) + +## What type of a transformation is this? +This transformation types of adjectives and adverbs and converts them to their negated antonym. +Therefore, the transformation retains the semantics of the original text as positives gets converted to negated negatives and negatives gets converted to negated positives. +Example: I think you are prepared for the test --> I think you are not unprepared for the test +## What tasks does it intend to benefit? +This can act as a valid perturbation that retains the semantics. +This will benefit for robustness of text classification like sentiment analysis, etc. + + +## What are the limitations of this transformation? +It is limited to only adjectives and adverbs (all types e.g, comparative and superlative) + +## Robustness Evaluation + +original accuracy: 96.0 +dataset_name: 'imdb' +model_name: 'aychang/roberta-base-imdb' +no_of_examples: 250 +accuracy after perturbation: 91.0 + +The accuracy drops by 5% when tested on imdb dataset for roberta + diff --git a/transformations/negated_antonym_perturbation/__init__.py b/transformations/negated_antonym_perturbation/__init__.py new file mode 100644 index 000000000..930cdce0b --- /dev/null +++ b/transformations/negated_antonym_perturbation/__init__.py @@ -0,0 +1 @@ +from .transformation import * diff --git a/transformations/negated_antonym_perturbation/test.json b/transformations/negated_antonym_perturbation/test.json new file mode 100644 index 000000000..e3a73a372 --- /dev/null +++ b/transformations/negated_antonym_perturbation/test.json @@ -0,0 +1,68 @@ +{ + "type": "negated_antonym_transformation", + "test_cases": [ + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "I think you are successful." + }, + "outputs": [{ + "sentence": "I think you are not unsuccessful." + }] + }, + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "I think you are prepared for the test." + }, + "outputs": [{ + "sentence": "I think you are not unprepared for the test." + }] + }, + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "He ran quickly." + }, + "outputs": [{ + "sentence": "He ran not slowly ." + }] + }, + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "He plays the flute beautifully." + }, + "outputs": [{ + "sentence": "He plays the flute not unattractively." + }] + }, + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "I think you are joyful." + }, + "outputs": [{ + "sentence": "I think you are not sorrowful." + }] + }, + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "During Harry's fourth year of school, Harry is unwillingly entered as a participant in the Triwizard Tournament, a dangerous yet exciting contest where three champions, one from each participating school, must compete with each other in three tasks in order to win the Triwizard Cup." + }, + "outputs": [{ + "sentence": "During Harry's fourth year of school, Harry is not willingly entered as a participant in the Triwizard Tournament, a not safe yet exciting contest where three champions, one from each participating school, must compete with each not same in three tasks in order to win the Triwizard Cup." + }] + }, + { + "class": "NegatedAntonym", + "inputs": { + "sentence": "Umbridge's incessant and persistent efforts to land him in trouble and the defensive lessons, Harry begins to lose sleep as he constantly receives disturbing dreams about a dark corridor in the Ministry of Magic, followed by a burning desire to learn more." + }, + "outputs": [{ + "sentence": "Umbridge's incessant and not caducous efforts to land him in trouble and the not offensive lessons, Harry begins to lose sleep as he constantly receives disturbing dreams about a not light corridor in the Ministry of Magic, followed by a burning desire to learn not less." + }] + } + ] +} \ No newline at end of file diff --git a/transformations/negated_antonym_perturbation/transformation.py b/transformations/negated_antonym_perturbation/transformation.py new file mode 100644 index 000000000..4828bfe8a --- /dev/null +++ b/transformations/negated_antonym_perturbation/transformation.py @@ -0,0 +1,49 @@ +import nltk +from nltk.corpus import wordnet +import nltk.tokenize as nt + +from interfaces.SentenceOperation import SentenceOperation +from tasks.TaskTypes import TaskType + +''' Class that converts adjectives and adverbs to its negated antonym''' + + +class NegatedAntonym(SentenceOperation): + tasks = [TaskType.TEXT_CLASSIFICATION, TaskType.TEXT_TO_TEXT_GENERATION, TaskType.SENTIMENT_ANALYSIS] + languages = ["en"] + keywords = ["sentiment-analysis", "tokenizer-required", "highly-meaning-preserving"] + + + def __init__(self, seed=0): + super().__init__(seed) + + def generate(self, sentence): + return [self.Neg_Antonym(sentence)] + + def Neg_Antonym(self, sentence): + + tokenized_sent = nt.word_tokenize(sentence) + pos_sentences = nltk.pos_tag(tokenized_sent) + + for i in range(len(pos_sentences)): + antonyms = [] + if pos_sentences[i][1] == 'JJ' or pos_sentences[i][1] == 'JJR' or pos_sentences[i][1] == 'JJS' or \ + pos_sentences[i][1] == 'RB' or pos_sentences[i][1] == 'RBR' or pos_sentences[i][1] == 'RBS': + for syn in wordnet.synsets(tokenized_sent[i]): + + for lm in syn.lemmas(): + + if lm.antonyms(): + antonyms.append(lm.antonyms()[0].name()) + + if len(antonyms) != 0: + tokenized_sent[i] = 'not ' + antonyms[0] + + for item in range(len(tokenized_sent)): + if tokenized_sent[item] == '.' or tokenized_sent[item] == ',' or tokenized_sent[item][0] == "'": + tokenized_sent[item - 1] = tokenized_sent[item - 1] + tokenized_sent[item] + tokenized_sent = [x for x in tokenized_sent if x != '.'] + tokenized_sent = [x for x in tokenized_sent if x != ','] + tokenized_sent = [x for x in tokenized_sent if x[0] != "'"] + + return ' '.join(tokenized_sent)