-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathHCRIS_downloader.py
129 lines (101 loc) · 3.95 KB
/
HCRIS_downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
from datetime import date
import os
import requests
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import zipfile
# import mysql as m
from mysql import DbHcris
HOME_URL = "http://downloads.cms.gov/FILES/HCRIS/"
ZIP_PATH = os.path.abspath(os.path.join(os.path.dirname( __file__ ), 'ZIP/'))
CSV_PATH = os.path.abspath(os.path.join(os.path.dirname( __file__ ), 'CSV/'))
if not os.path.exists(ZIP_PATH):
os.makedirs(ZIP_PATH)
if not os.path.exists(CSV_PATH):
os.makedirs(CSV_PATH)
def main():
# delete and data in zip or csv dir
if sys.argv[1] == "delete_zip_and_csv":
delete_zip_and_csv_files()
# download hcris data
if sys.argv[1] == "get_hcris_data":
get_hcris_data_from_cms_website()
# Unzip files just downloaded to the CSV path
if sys.argv[1] == "unzip":
unzip_files_in_dir(ZIP_PATH,CSV_PATH)
# drop old hcris tables and create fresh ones
if sys.argv[1] == "create_clean_hcris_tables":
create_clean_hcris_tables()
# Load csv data into mysql database
if sys.argv[1] == "load_csvs_to_db":
load_csvs_to_db()
if sys.argv[1] == "add_procedures":
m = DbHcris(sys.argv[2], sys.argv[3],sys.argv[4], int(sys.argv[5]))
m.add_stored_procedures()
if sys.argv[1] == "add_identifier":
m = DbHcris(sys.argv[2], sys.argv[3],sys.argv[4], int(sys.argv[5]))
m.add_identifier_to_RPT()
def delete_zip_and_csv_files():
# clear Zip files and start fresh
remove_files_in_dir(ZIP_PATH)
# clear CSV files and start fresh
remove_files_in_dir(CSV_PATH)
def get_hcris_data_from_cms_website():
# Start from where user said they wanted to
for y in range(2012,date.today().year):
post_10 = 'HOSP10FY{0}.zip'.format(y)
# Attempt to open the post_10 url for download
r = requests.get('{0}{1}'.format(HOME_URL,post_10), stream = True)
# make sure the link is valid
if r.status_code == 200:
# If link is valid, download
download_file(r,post_10)
# show it worked
print '{0} Was downloaded'.format(post_10)
# Alert everything is finished
print 'All finished downloading'
def download_file(r,local_filename):
with open("{0}/{1}".format(ZIP_PATH,local_filename), 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
f.flush()
return local_filename
def unzip_files_in_dir(source_dir,destination_dir):
for subdir, dirs, files in os.walk(source_dir):
for file in files:
if file[:1] == ".":
continue
with zipfile.ZipFile(os.path.join(subdir, file), "r") as z:
z.extractall(destination_dir)
print 'extracted {0}'.format(file)
def remove_files_in_dir(dir):
filelist = [ f for f in os.listdir(dir)]
for f in filelist:
if f.startswith('.'):
continue
os.remove(os.path.join(dir,f))
print 'deleted {0}'.format(f)
def create_clean_hcris_tables():
m = DbHcris(sys.argv[2], sys.argv[3],sys.argv[4], int(sys.argv[5]))
m.drop_hcris_tables()
m.create_hcris_tables()
def load_csvs_to_db():
m = DbHcris(sys.argv[2], sys.argv[3],sys.argv[4], int(sys.argv[5]))
filelist = [ f for f in os.listdir(CSV_PATH)]
# Sort the csvs by substr reverse alphabetical order
# We do this so that the RPT table is loaded first
filelist.sort(key=lambda x:(x.split('_')[-1]),reverse=True)
for f in filelist:
if f.startswith('.'):
continue
path = os.path.join(CSV_PATH,f)
hcris_table = f.split('_')[-1]
# returns table to be loaded
print hcris_table
hcris_table = hcris_table.split('.')[0]
func = 'load_hcris_{0}'.format(hcris_table)
result = getattr(m, func)(path)
if __name__ == '__main__':
main()