-
Notifications
You must be signed in to change notification settings - Fork 1
/
find_the_same_files.py
51 lines (45 loc) · 1.37 KB
/
find_the_same_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import os
import hashlib
def walk(path):
isdir, isfile, join = os.path.isdir, os.path.isfile, os.path.join
if not os.path.exists(path):
print "%s: No such file or directory" % path
else:
lsdir = os.listdir(path)
dirs = [i for i in lsdir if isdir(join(path,i))]
files = [i for i in lsdir if isfile(join(path,i))]
yield(path, dirs, files)
if dirs:
for d in dirs:
for (p, d, f) in walk(join(path,d)):
yield (p, d, f)
#yield(path, dirs, files)
def md5sum(f):
md5 = hashlib.md5()
fd = open(f)
while True:
data = fd.read(1024*4)
if data:
md5.update(data)
else:
break
fd.close()
return md5.hexdigest()
def issame(a, b):
if md5sum(a) == md5sum(b):
print '%s is the same file' % os.path.join(path, a)
def main():
all_md5=[]
for path, dirs, files in walk(sys.argv[1]):
for f in files:
pf = os.path.join(path,f)
filemd5 = md5sum(pf)
if filemd5 in all_md5:
print "This file has same md5: %s" % pf
else:
all_md5.append(filemd5)
if __name__ == "__main__":
main()