Skip to content

Commit

Permalink
DuplicateFileBear: Add DuplicateFileBear
Browse files Browse the repository at this point in the history
This bear checks for files with same content .

Closes #1479
  • Loading branch information
Siddhartha1234 committed Mar 24, 2017
1 parent 735dda7 commit 63d31e1
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 0 deletions.
38 changes: 38 additions & 0 deletions bears/general/DuplicateFileBear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import itertools

from coalib.bears.GlobalBear import GlobalBear
from coalib.results.Result import Result
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY


class DuplicateFileBear(GlobalBear):
LANGUAGES = {'All'}
AUTHORS = {'The coala developers'}
AUTHORS_EMAILS = {'[email protected]'}
LICENSE = 'AGPL-3.0'
CAN_DETECT = {'Duplication'}

def run(self):
"""
Checks for Duplicate Files
"""
if not self.file_dict:
yield Result(self, 'You did not add any file to compare',
severity=RESULT_SEVERITY.MAJOR)
elif len(self.file_dict) == 1:
yield Result(self, 'You included only one file',
severity=RESULT_SEVERITY.MAJOR)
else:
unique_tuples = [file_tuple for file_tuple in
itertools.combinations(self.file_dict, 2)]

for file_pair in unique_tuples:
if (self.file_dict[file_pair[0]] ==
self.file_dict[file_pair[1]]):
first_file_name = file_pair[0]
second_file_name = file_pair[1]
message = ('File ' + first_file_name + ' is identical'
' to File ' + second_file_name)
yield Result.from_values(origin=self, message=message,
severity=RESULT_SEVERITY.INFO,
file=first_file_name)
65 changes: 65 additions & 0 deletions tests/general/DuplicatefileBearTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import unittest
import os

from coalib.settings.Section import Section
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY
from bears.general.DuplicateFileBear import DuplicateFileBear
from queue import Queue


def get_absolute_test_path(file):
return os.path.join(os.path.dirname(__file__),
'duplicate_test_files', file)


class DuplicateFileBearTest(unittest.TestCase):

def setUp(self):
self.section = Section('name')
self.queue = Queue()
self.file_dict = {}
self.test_files = ['complexFirst.txt', 'complexSecond.txt',
'noMatch.txt', 'smallFirst.txt',
'smallSecond.txt']

def get_results(self, files_to_check):
self.files = [get_absolute_test_path(file) for file in files_to_check]
for filename in self.files:
with open(filename, 'r', encoding='utf-8') as _file:
self.file_dict[filename] = tuple(_file.readlines())
self.maxDiff = None
self.uut = DuplicateFileBear(self.file_dict, self.section,
self.queue)
return list(self.uut.run())

def test_results_complete(self):
results = self.get_results(self.test_files)
messages = [result.message for result in results]
combined = '\t'.join(messages)
self.assertIn(get_absolute_test_path(
'complexSecond.txt'), combined.split())
self.assertIn(get_absolute_test_path(
'complexFirst.txt'), combined.split())
self.assertIn(get_absolute_test_path(
'smallFirst.txt'), combined.split())
self.assertIn(get_absolute_test_path(
'smallSecond.txt'), combined.split())
self.assertEquals(results[0].severity, RESULT_SEVERITY.INFO)

def test_results_no_duplicates(self):
results = self.get_results([self.test_files[2],
self.test_files[3]])
messages = [result.message for result in results]
self.assertEquals(messages, [])

def test_results_empty(self):
results = self.get_results([])
messages = [result.message for result in results]
self.assertEquals(messages, ['You did not add any file to compare'])
self.assertEquals(results[0].severity, RESULT_SEVERITY.MAJOR)

def test_result_single(self):
results = self.get_results([self.test_files[0]])
messages = [result.message for result in results]
self.assertEquals(messages, ['You included only one file'])
self.assertEquals(results[0].severity, RESULT_SEVERITY.MAJOR)
1 change: 1 addition & 0 deletions tests/general/duplicate_test_files/complexFirst.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ ĦŊħ ß°^ \\\n\u2192
1 change: 1 addition & 0 deletions tests/general/duplicate_test_files/complexSecond.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ ĦŊħ ß°^ \\\n\u2192
1 change: 1 addition & 0 deletions tests/general/duplicate_test_files/noMatch.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
I dont match with any other file!
1 change: 1 addition & 0 deletions tests/general/duplicate_test_files/smallFirst.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a small file!
1 change: 1 addition & 0 deletions tests/general/duplicate_test_files/smallSecond.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This is a small file!

0 comments on commit 63d31e1

Please sign in to comment.