diff --git a/bears/general/DuplicateFileBear.py b/bears/general/DuplicateFileBear.py new file mode 100644 index 0000000000..0bd4603276 --- /dev/null +++ b/bears/general/DuplicateFileBear.py @@ -0,0 +1,38 @@ +import itertools + +from coalib.bears.GlobalBear import GlobalBear +from coalib.results.Result import Result +from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY + + +class DuplicateFileBear(GlobalBear): + LANGUAGES = {'All'} + AUTHORS = {'The coala developers'} + AUTHORS_EMAILS = {'coala-devel@googlegroups.com'} + LICENSE = 'AGPL-3.0' + CAN_DETECT = {'Duplication'} + + def run(self): + """ + Checks for Duplicate Files + """ + if not self.file_dict: + yield Result(self, 'You did not add any file to compare', + severity=RESULT_SEVERITY.MAJOR) + elif len(self.file_dict) == 1: + yield Result(self, 'You included only one file', + severity=RESULT_SEVERITY.MAJOR) + else: + unique_tuples = [file_tuple for file_tuple in + itertools.combinations(self.file_dict, 2)] + + for file_pair in unique_tuples: + if (self.file_dict[file_pair[0]] == + self.file_dict[file_pair[1]]): + first_file_name = file_pair[0] + second_file_name = file_pair[1] + message = ('File ' + first_file_name + ' is identical' + ' to File ' + second_file_name) + yield Result.from_values(origin=self, message=message, + severity=RESULT_SEVERITY.INFO, + file=first_file_name) diff --git a/tests/general/DuplicatefileBearTest.py b/tests/general/DuplicatefileBearTest.py new file mode 100644 index 0000000000..45e56c4f28 --- /dev/null +++ b/tests/general/DuplicatefileBearTest.py @@ -0,0 +1,65 @@ +import unittest +import os + +from coalib.settings.Section import Section +from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY +from bears.general.DuplicateFileBear import DuplicateFileBear +from queue import Queue + + +def get_absolute_test_path(file): + return os.path.join(os.path.dirname(__file__), + 'duplicate_test_files', file) + + +class DuplicateFileBearTest(unittest.TestCase): + + def setUp(self): + self.section = Section('name') + self.queue = Queue() + self.file_dict = {} + self.test_files = ['complexFirst.txt', 'complexSecond.txt', + 'noMatch.txt', 'smallFirst.txt', + 'smallSecond.txt'] + + def get_results(self, files_to_check): + self.files = [get_absolute_test_path(file) for file in files_to_check] + for filename in self.files: + with open(filename, 'r', encoding='utf-8') as _file: + self.file_dict[filename] = tuple(_file.readlines()) + self.maxDiff = None + self.uut = DuplicateFileBear(self.file_dict, self.section, + self.queue) + return list(self.uut.run()) + + def test_results_complete(self): + results = self.get_results(self.test_files) + messages = [result.message for result in results] + combined = '\t'.join(messages) + self.assertIn(get_absolute_test_path( + 'complexSecond.txt'), combined.split()) + self.assertIn(get_absolute_test_path( + 'complexFirst.txt'), combined.split()) + self.assertIn(get_absolute_test_path( + 'smallFirst.txt'), combined.split()) + self.assertIn(get_absolute_test_path( + 'smallSecond.txt'), combined.split()) + self.assertEquals(results[0].severity, RESULT_SEVERITY.INFO) + + def test_results_no_duplicates(self): + results = self.get_results([self.test_files[2], + self.test_files[3]]) + messages = [result.message for result in results] + self.assertEquals(messages, []) + + def test_results_empty(self): + results = self.get_results([]) + messages = [result.message for result in results] + self.assertEquals(messages, ['You did not add any file to compare']) + self.assertEquals(results[0].severity, RESULT_SEVERITY.MAJOR) + + def test_result_single(self): + results = self.get_results([self.test_files[0]]) + messages = [result.message for result in results] + self.assertEquals(messages, ['You included only one file']) + self.assertEquals(results[0].severity, RESULT_SEVERITY.MAJOR) diff --git a/tests/general/duplicate_test_files/complexFirst.txt b/tests/general/duplicate_test_files/complexFirst.txt new file mode 100644 index 0000000000..b1374a7f24 --- /dev/null +++ b/tests/general/duplicate_test_files/complexFirst.txt @@ -0,0 +1 @@ +4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ ĦŊħ ß°^ \\\n\u2192 diff --git a/tests/general/duplicate_test_files/complexSecond.txt b/tests/general/duplicate_test_files/complexSecond.txt new file mode 100644 index 0000000000..b1374a7f24 --- /dev/null +++ b/tests/general/duplicate_test_files/complexSecond.txt @@ -0,0 +1 @@ +4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ ĦŊħ ß°^ \\\n\u2192 diff --git a/tests/general/duplicate_test_files/noMatch.txt b/tests/general/duplicate_test_files/noMatch.txt new file mode 100644 index 0000000000..beb0c3b970 --- /dev/null +++ b/tests/general/duplicate_test_files/noMatch.txt @@ -0,0 +1 @@ +I dont match with any other file! diff --git a/tests/general/duplicate_test_files/smallFirst.txt b/tests/general/duplicate_test_files/smallFirst.txt new file mode 100644 index 0000000000..58d758dee7 --- /dev/null +++ b/tests/general/duplicate_test_files/smallFirst.txt @@ -0,0 +1 @@ +This is a small file! diff --git a/tests/general/duplicate_test_files/smallSecond.txt b/tests/general/duplicate_test_files/smallSecond.txt new file mode 100644 index 0000000000..58d758dee7 --- /dev/null +++ b/tests/general/duplicate_test_files/smallSecond.txt @@ -0,0 +1 @@ +This is a small file!