-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
131 lines (100 loc) · 3.24 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import requests
from bs4 import BeautifulSoup
import re
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from datetime import datetime
SMTP_SERVER = ""
SMTP_PORT = 0
EMAIL_ADDRESS = ""
PASSWORD = ""
EMAIL_TO = ""
def checkWebPage():
URL = "https://www.su.nottingham.ac.uk/jobs/"
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
results = soup.find(id='web-scraper')
results = results.prettify()
file = open("templatenojobs.html", "r")
template = file.read()
file.close()
if(results != template):
print("Page has changed")
file = open("newpage.html", "w+")
file.write(results)
file.close()
return True
def createConfig():
print("\tCreating config file")
print("\tPlease fill in your email login details")
file = open("config", "w")
file.write("SMTP_SERVER=\nSMTP_PORT=\nEMAIL_ADDRESS_FROM=\nPASSWORD=\nEMAIL_ADDRESS_TO=")
file.close()
def checkConfig():
global SMTP_SERVER
global SMTP_PORT
global EMAIL_ADDRESS
global PASSWORD
global EMAIL_TO
try:
file = open("config", "r")
except(FileNotFoundError):
print("\tConfig file does not exist")
createConfig()
exit(1)
try:
serverConfig = file.readline()
portConfig = file.readline()
emailFromConfig = file.readline()
passwordConfig = file.readline()
emailToConfig = file.readline()
except:
raise
print("\tConfig file in wrong format")
createConfig()
file.close()
exit(1)
file.close()
server = re.search("^SMTP_SERVER=(.+)$", serverConfig)
port = re.search("^SMTP_PORT=(.+)$", portConfig)
emailFrom = re.search("^EMAIL_ADDRESS_FROM=(.+)$", emailFromConfig)
password = re.search("^PASSWORD=(.+)$", passwordConfig)
emailTo = re.search("^EMAIL_ADDRESS_TO=(.+)$", emailToConfig)
if(server is None or port is None or emailFrom is None or password is None or emailTo is None):
print("\tConfig file not filled in")
createConfig()
exit(1)
SMTP_SERVER = server.group(1)
SMTP_PORT = int(port.group(1))
EMAIL_ADDRESS = emailFrom.group(1)
PASSWORD = password.group(1)
EMAIL_TO = emailTo.group(1)
def sendEmail():
s = smtplib.SMTP(host=SMTP_SERVER, port=SMTP_PORT)
s.starttls()
s.login(EMAIL_ADDRESS, PASSWORD)
msg = MIMEMultipart() # create a message
# add in the actual person name to the message template
message = "<p>UoN SU Jobs page has changed: <a href=\"https://www.su.nottingham.ac.uk/jobs/\">Visit Page</a></p>"
# setup the parameters of the message
msg['From'] = EMAIL_ADDRESS
msg['To'] = EMAIL_TO
msg['Subject'] = "UoN SU Jobs site changed"
# add in the message body
msg.attach(MIMEText(message, 'html'))
# send the message via the server set up earlier.
s.send_message(msg)
del msg
s.quit()
if __name__ == "__main__":
checkConfig()
if(checkWebPage()):
sendEmail()
print("Sent email")
else:
print("Site not changed")
file = open("timerun.txt", "a+")
file.write(str(datetime.now().time()) + "\n")
file.close()
print("End of program")