-
Notifications
You must be signed in to change notification settings - Fork 5
/
emotionDatabase.py
44 lines (37 loc) · 1.54 KB
/
emotionDatabase.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import requests
from bs4 import BeautifulSoup
"""
The code we used to generate and work with the database of emotion txts
"""
def create_emotion_database(moods):
for mood in moods:
print(("Generating file for {}.").format(mood))
create_emotion_txt(mood)
def create_emotion_txt(emotion):
filename = ("emotions/{}.txt").format(emotion)
with open(filename, 'wb') as f:
try: # so that we generate as many pages as there are on goodreads
for i in range(1, 101):
print(i)
quotes_page = requests.get(
("https://www.goodreads.com/quotes/tag/{}?page={}").format(emotion, i)).text
soup = BeautifulSoup(
quotes_page,
"html.parser")
for notNeeded in soup(["script", "style", "span", "a"]):
# remove unnecessary html elements
notNeeded.extract()
quotes = [
quote for quote in soup.find_all(
'div',
attrs={'class': 'quoteText'})]
for q in quotes:
text = q.find_all(text=True)
for line in text:
if line.strip() == "―" or line.strip() == ",":
# remove quote attributions
continue
f.write(line.strip().encode('utf-8'))
f.write('\n'.encode('utf-8')) # separate quotes
except:
pass