-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.py
31 lines (27 loc) · 1.06 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import requests
from bs4 import BeautifulSoup
import pprint
def sort_stories(hnlist):
return sorted(hnlist, key= lambda k:k['votes'], reverse=True) # sorting by key "votes" in dict
def create_custom_hn(links, subtext):
hn = []
for ind, item in enumerate(links):
title = links[ind].getText()
href = links[ind].get('href', None)
votes = subtext[ind].select('.score')
if len(votes):
points = int(votes[0].getText().replace(' points', ''))
if points > 99:
hn.append({'title': title, 'link': href, 'votes':points})
return sort_stories(hn)
res = requests.get('https://news.ycombinator.com/')
soup = BeautifulSoup(res.text, 'html.parser')
link = soup.select('.storylink')
subtext = soup.select('.subtext')
pprint.pprint(create_custom_hn(link, subtext))
print("\n\n\n\n\n\n")
res2 = requests.get('https://news.ycombinator.com/news?p=2')
soup2= BeautifulSoup(res2.text, 'html.parser')
link2 = soup2.select('.storylink')
subtext2 = soup2.select('.subtext')
pprint.pprint(create_custom_hn(link2, subtext2))