-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstartup.py
47 lines (38 loc) · 1.17 KB
/
startup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
print
print ("checking for nltk")
try:
import nltk
except ImportError:
print ("you should install nltk before continuing")
print ("checking for numpy")
try:
import numpy
except ImportError:
print ("you should install numpy before continuing")
print ("checking for scipy")
try:
import scipy
except:
print ("you should install scipy before continuing")
print ("checking for sklearn")
try:
import sklearn
except:
print ("you should install sklearn before continuing")
print
print ("downloading the Enron dataset (this may take a while)")
print ("to check on progress, you can cd up one level, then execute <ls -lthr>")
print ("Enron dataset should be last item on the list, along with its current size")
print ("download will complete at about 423 MB")
import urllib.request
url = "https://www.cs.cmu.edu/~./enron/enron_mail_20150507.tar.gz"
urllib.request.urlretrieve(url, filename="../enron_mail_20150507.tar.gz")
print ("download complete!")
print
print ("unzipping Enron dataset (this may take a while)")
import tarfile
import os
os.chdir("..")
tfile = tarfile.open("enron_mail_20150507.tar.gz", "r:gz")
tfile.extractall(".")
print("you're ready to go!")