forked from dilawar/Scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgit_search_and_purge.py
executable file
·81 lines (71 loc) · 2.24 KB
/
git_search_and_purge.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/usr/bin/env python
import os
import subprocess
import sys
import string
import re
def getOutput(cmd):
return os.popen(cmd).read()
if (len(sys.argv) < 3):
print "usage: %s -s size_in_bytes -e regex" % sys.argv[0]
else:
maxSize = int(sys.argv[2])
def search_files(size, pat) :
foundFiles = set()
revisions = getOutput("git rev-list HEAD").split()
bigfiles = set()
for revision in revisions:
files = getOutput("git ls-tree -rl %s" % revision)
files = files.split("\n")
for file in files:
if file == "":
continue
splitdata = file.split()
if len(splitdata) > 3 :
commit = splitdata[2]
if splitdata[-1] == "-":
size = 0
else :
size = int(splitdata[3])
path = " ".join(splitdata[4:])
if (size > maxSize):
path = path.split("\x00100")
bigfiles.add("%10d %s %s" % (size, commit, path[0]))
bigfiles = sorted(bigfiles, reverse=True)
for f in bigfiles:
filename = f.split()[-1].split("/")[-1]
filepath = f.split("<>")[-1]
if(len(sys.argv) > 3) :
pat = sys.argv[4]
if(re.match(pat, filename)) :
foundFiles.add(filepath.strip())
else : pass
else :
foundFiles.add(filepath.strip())
return foundFiles
def purge_file(filename) :
print("|- Purging file {0} ...".format(filename))
command = '''git filter-branch -f --index-filter
'git rm --cached --ignore-unmatch {0}'
--prune-empty --tag-name-filter cat -- --all'''.format(filename)
command = string.replace(command, "\n", " ")
subprocess.call(command, shell=True)
if __name__ == "__main__" :
usage = "Usage : git_search_and_purge.py -s size_in_bytes [-e regex]"
if len(sys.argv) < 3 or len(sys.argv) > 5 :
print usage
sys.exit(0)
size = sys.argv[2]
if len(sys.argv) == 5 :
regex = sys.argv[4]
else :
regex = ".*"
files = search_files(size, regex)
for file in files :
file = string.replace(file, " ", "\ ")
purge_file(file)
### Purge the local references.
subprocess.call("rm -rf ./.git/refs/original", shell=True)
subprocess.call("git reflog expire --expire=now --all", shell=True)
subprocess.call("git gc --prune=now", shell=True)
subprocess.call("git push origin master --force", shell=True)