-
Notifications
You must be signed in to change notification settings - Fork 1
/
recursive_tgz_extract.py
96 lines (76 loc) · 3.01 KB
/
recursive_tgz_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python
## This is used to extract .tgz recursively
## Just copy the parent .tgz into a folder and run it.
## WARNING: it could delete the .tgz after extraction. So backup
import os
import sys
## How to extract these files ???
archive_detail = { '.tgz': {'cmd': 'tar -zxf {} -C {}'},
'.tar.gz': {'cmd': 'tar -zxf {} -C {}'},
'.tar': {'cmd': 'tar -xf {} -C {}' },
'.gz' : {'cmd': 'gunzip {}'}
}
## This function will be invoked recursively to extract the files
## takes current path and new path to be tried as arguments
def recursive_archive(cwd_path, new_path):
## Ignore normal files and soft links
if (os.path.isfile(new_path) or os.path.islink(new_path)):
return
## sometimes chdir fails due to permission error
os.chmod(new_path, 0777)
## Lets get to this folder
os.chdir(new_path)
print ('cd ' + new_path)
## walk through all the child directories.
## Cant we use os.path.walk() ???
for filename in os.listdir('.'):
## again, ignore softlinks
if (os.path.islink(filename)):
continue
## just confirm if its normal file
if (os.path.isfile(filename)):
for extn in archive_detail.keys():
if (filename.endswith(extn)):
folder_name = filename[0:(-1 * len(extn))]
if (extn == '.gz'):
## we handle both .gz and .tar.gz
if (filename.endswith('.tar.gz')):
continue
## To handle 0size file. Just delete them
if (os.path.getsize(filename) == 0):
os.unlink(filename)
print ('# del 0 byte file ' + filename)
continue
os.system(archive_detail[extn]['cmd'].format(filename))
print (archive_detail[extn]['cmd'].format(filename))
recursive_archive (new_path, os.path.join(new_path, folder_name))
else:
## Handles other types, like, tar.gz, tgz etc..
## Just create a new folder with tgz name and extract there
## Finally del the filename
os.mkdir(folder_name)
print ('mkdir ' + folder_name)
os.system(archive_detail[extn]['cmd'].format(filename, folder_name))
print (archive_detail[extn]['cmd'].format(filename, folder_name))
os.unlink(filename)
print ('rm ' + filename)
recursive_archive (new_path, os.path.join(new_path, folder_name))
continue
## Handle directory
## Just initiate a recursion for each folder we come across
if (os.path.isdir(filename)):
temp_path = os.path.join(new_path, filename)
#print('HERE' + temp_path)
recursive_archive (new_path, temp_path)
os.chdir(cwd_path)
print ('cd ' + cwd_path)
## start of main()
if __name__ == "__main__":
cwd = os.getcwd()
if (len(sys.argv) <= 1):
recursive_archive(cwd, os.path.join(cwd, '.'))
else:
for paths in sys.argv[1:]:
os.chdir(paths)
recursive_archive(cwd, os.path.join(cwd, paths))
os.chdir(cwd)