-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
52 lines (33 loc) · 1.04 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import urllib.request
from bs4 import BeautifulSoup
import csv
burl="https://www.bbc.com/news/science_and_environment"
def get_html(url):
response = urllib.request.urlopen(url)
return response.read()
def parse(html):
soup = BeautifulSoup(html, 'html.parser')
div = soup.find('div', class_='gs-c-promo-body gs-u-mt@xxs gs-u-mt@m gs-c-promo-body--primary gs-u-mt@xs gs-u-mt@s gs-u-mt@m gs-u-mt@xl gel-1/3@m gel-1/2@xl gel-1/1@xxl')
projects = []
for mainn in div.find_all('div'):
other = div.find('a')
projects.append({
'Head':other.h3.text
})
for project in projects:
print(project)
return projects
def save(projects, path):
with open(path, 'w') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(('Name - ',))
writer.writerows(
(project['Head'],) for project in projects
)
def main():
projects = []
projects.extend(parse(get_html(burl)))
save(projects, 'pars.csv')
if __name__ == '__main__':
main()
#first parsing bbc