-
Notifications
You must be signed in to change notification settings - Fork 1
/
KMP.py
101 lines (95 loc) · 2.61 KB
/
KMP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from database_interactor import readData
from parser2_v2 import Parser2
def transition_function(P):
'''
A utility function that creates a PI table needed to
avoid back-tracking on 'Text' array.
:param P:
:return a:
'''
m = len(P)
k=0
a=[0]*m
for i in range(1,m):
while k>0 and P[k]!=P[i]:
k=a[k-1]
if P[k]==P[i]:
k += 1
a[i]=k
return a
def KnuthMorrisPratt(T,P):
'''
The main algorithmic implementation of KMP Algorithm to search against
plagiarisation in given two texts. We return count of matched patterns.
:param T:
:param P:
:return count:
'''
k=0
m=len(P)
n=len(T)
flag=0
count = 0
a = transition_function(P)
for i in range(n):
while k>0 and T[i]!=P[k]:
k=a[k-1]
if T[i]==P[k]:
if k == m - 1:
#print("Shift found at ", i - m + 1)
count+=1
flag = 1
k = a[k]
else:
k+=1
return count
class engine:
def recieve_data(self):
'''
Collects data from database
:return None :
'''
p1 = readData("original.txt")
p1.parse_data()
self.data1 = p1.send_data()
p2 = readData("pattern.txt")
p2.parse_data()
self.data2 = p2.send_data()
def send_data(self):
'''
Sends the required data to whatever file asks for it
:return None:
'''
ob1 = Parser2(self.data1)
ob2 = Parser2(self.data2)
self.text = set(ob1.parsed_words())
self.patt = set(ob2.parsed_words())
self.total_words1 = len(self.text)
self.total_words2 = len(self.patt)
def process(self):
'''
A utility function that processes the obtained data and returns the percentage
of plagiarisation bw the original text and suspected text!
:return String type data:
'''
c = 0
for i in self.patt:
for j in self.text:
if len(i) == len(j):
c += KnuthMorrisPratt(j, i)
#print(i,j,c)
if not c:
print("No Plagiarism")
return str(0.00)
else:
print("count:",c)
plag_percent = (2 * c) /(self.total_words1+self.total_words2)
print("The article is %f %% plagiarised" %( plag_percent * 100))
return str(plag_percent * 100)[0:6]
'''
To instantiate the class, the following calls need to be made
ob = engine()
ob.recieve_data()
ob.send_data()
print(ob.process())
'''