-
Notifications
You must be signed in to change notification settings - Fork 581
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
DuplicateFileBear: Add DuplicateFileBear
This bear checks for files with same content . Closes #1479
- Loading branch information
Siddhartha1234
committed
Mar 24, 2017
1 parent
735dda7
commit 63d31e1
Showing
7 changed files
with
108 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import itertools | ||
|
||
from coalib.bears.GlobalBear import GlobalBear | ||
from coalib.results.Result import Result | ||
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY | ||
|
||
|
||
class DuplicateFileBear(GlobalBear): | ||
LANGUAGES = {'All'} | ||
AUTHORS = {'The coala developers'} | ||
AUTHORS_EMAILS = {'[email protected]'} | ||
LICENSE = 'AGPL-3.0' | ||
CAN_DETECT = {'Duplication'} | ||
|
||
def run(self): | ||
""" | ||
Checks for Duplicate Files | ||
""" | ||
if not self.file_dict: | ||
yield Result(self, 'You did not add any file to compare', | ||
severity=RESULT_SEVERITY.MAJOR) | ||
elif len(self.file_dict) == 1: | ||
yield Result(self, 'You included only one file', | ||
severity=RESULT_SEVERITY.MAJOR) | ||
else: | ||
unique_tuples = [file_tuple for file_tuple in | ||
itertools.combinations(self.file_dict, 2)] | ||
|
||
for file_pair in unique_tuples: | ||
if (self.file_dict[file_pair[0]] == | ||
self.file_dict[file_pair[1]]): | ||
first_file_name = file_pair[0] | ||
second_file_name = file_pair[1] | ||
message = ('File ' + first_file_name + ' is identical' | ||
' to File ' + second_file_name) | ||
yield Result.from_values(origin=self, message=message, | ||
severity=RESULT_SEVERITY.INFO, | ||
file=first_file_name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
import unittest | ||
import os | ||
|
||
from coalib.settings.Section import Section | ||
from coalib.results.RESULT_SEVERITY import RESULT_SEVERITY | ||
from bears.general.DuplicateFileBear import DuplicateFileBear | ||
from queue import Queue | ||
|
||
|
||
def get_absolute_test_path(file): | ||
return os.path.join(os.path.dirname(__file__), | ||
'duplicate_test_files', file) | ||
|
||
|
||
class DuplicateFileBearTest(unittest.TestCase): | ||
|
||
def setUp(self): | ||
self.section = Section('name') | ||
self.queue = Queue() | ||
self.file_dict = {} | ||
self.test_files = ['complexFirst.txt', 'complexSecond.txt', | ||
'noMatch.txt', 'smallFirst.txt', | ||
'smallSecond.txt'] | ||
|
||
def get_results(self, files_to_check): | ||
self.files = [get_absolute_test_path(file) for file in files_to_check] | ||
for filename in self.files: | ||
with open(filename, 'r', encoding='utf-8') as _file: | ||
self.file_dict[filename] = tuple(_file.readlines()) | ||
self.maxDiff = None | ||
self.uut = DuplicateFileBear(self.file_dict, self.section, | ||
self.queue) | ||
return list(self.uut.run()) | ||
|
||
def test_results_complete(self): | ||
results = self.get_results(self.test_files) | ||
messages = [result.message for result in results] | ||
combined = '\t'.join(messages) | ||
self.assertIn(get_absolute_test_path( | ||
'complexSecond.txt'), combined.split()) | ||
self.assertIn(get_absolute_test_path( | ||
'complexFirst.txt'), combined.split()) | ||
self.assertIn(get_absolute_test_path( | ||
'smallFirst.txt'), combined.split()) | ||
self.assertIn(get_absolute_test_path( | ||
'smallSecond.txt'), combined.split()) | ||
self.assertEquals(results[0].severity, RESULT_SEVERITY.INFO) | ||
|
||
def test_results_no_duplicates(self): | ||
results = self.get_results([self.test_files[2], | ||
self.test_files[3]]) | ||
messages = [result.message for result in results] | ||
self.assertEquals(messages, []) | ||
|
||
def test_results_empty(self): | ||
results = self.get_results([]) | ||
messages = [result.message for result in results] | ||
self.assertEquals(messages, ['You did not add any file to compare']) | ||
self.assertEquals(results[0].severity, RESULT_SEVERITY.MAJOR) | ||
|
||
def test_result_single(self): | ||
results = self.get_results([self.test_files[0]]) | ||
messages = [result.message for result in results] | ||
self.assertEquals(messages, ['You included only one file']) | ||
self.assertEquals(results[0].severity, RESULT_SEVERITY.MAJOR) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ ĦŊħ ß°^ \\\n\u2192 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
4 r34l ch4ll3n63: 123 ÄÖü ABc @€¥ §&% {[( ←↓→↑ ĦŊħ ß°^ \\\n\u2192 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
I dont match with any other file! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This is a small file! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
This is a small file! |