-
Notifications
You must be signed in to change notification settings - Fork 0
/
directory_feed_constructor.py
128 lines (102 loc) · 3.98 KB
/
directory_feed_constructor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""
Classname: DirectoryFeedConstructor
Description: Creates a feed for an internal directory type of feed
Authored by: MapLarge, Inc. (Scott Rowles)
Change Log:
"""
"""
Define all the imports for the site_feed_constructor class
"""
import os
import re
import sys
import datetime as dt
import codecs
import uuid
import shutil
from feed_constructor import FeedConstructor
class DirectoryFeedConstructor(FeedConstructor):
"""
Define the class properties
"""
depth_limit = None
storage_root = None
storage_subdirectory = None
storage_directory = None
web_root = None
web_subdiretory = None
web_directory = None
start_directory = None
search_filter = None
def __init__(self, **config_json):
"""
Initialize the super class and load the class properties from the config json
"""
super(DirectoryFeedConstructor, self).__init__(**config_json)
self.storage_root = self.config_json['storage_root']
self.storage_subdirectory = self.config_json['storage_subdirectory']
self.storage_directory = os.path.join(self.storage_root, self.storage_subdirectory)
self.start_directory = self.config_json['start_directory']
self.web_root = self.config_json['web_root']
self.web_subdirectory = self.config_json['web_subdirectory']
self.web_directory = self.web_root + "/" + self.web_subdirectory
if self.config_json['feed_type'] == 'directory':
self.depth_limit = self.config_json['depth_limit']
self.search_filter = self.config_json['search_filter']
else:
self.depth_limit = 1
self.search_filter = self.config_json['file']
def walk_dir(self):
"""
Walk the directory with a given depth_limit
"""
path = os.path.normpath(self.start_directory)
#If the local subdirectory for starage does not exist,
# create it
try:
os.makedirs(self.storage_directory)
except OSError:
if os.path.exists(self.storage_directory):
pass
else:
raise
fpattern = re.compile(self.search_filter) if self.search_filter else None
adate = dt.datetime.utcnow().isoformat()
with codecs.open(self.storage_file, 'w', "utf-8") as ff:
for root,dirs,files in os.walk(self.start_directory, topdown=True):
depth = root[len(path) + len(os.path.sep):].count(os.path.sep)
if self.depth_limit and depth == self.depth_limit:
dirs[:] = [] # Don't recurse any deeper
else:
for afile in files:
if fpattern and fpattern.match(afile):
# Want to add each entry to the file,
fpath_orig = os.path.join(root, afile)
fpath_stage = os.path.join(self.storage_directory, afile)
try:
shutil.copy(fpath_orig, fpath_stage)
url = self.web_directory + "/" + afile
ff.write('{0}, {1}, {2}\n'.format(fpath_orig,
url,
adate.encode('utf-8')))
except IOError as e:
continue
except Exception:
print e.message
def construct_feed(self):
"""
Execute the walk_dir method
"""
self.walk_dir()
def provide_params(self):
"""
Provide parameters needed to dave the feed to the database
"""
params = dict()
params['title'] = self.title
params['root_dir'] = self.start_directory
params['items_url'] = self.storage_file
params['feed_uuid']= uuid.uuid4()
params['pub_time'] = dt.datetime.now()
params['mod_time'] = dt.datetime.now()
return params