forked from intoli/intoli-article-materials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
scrape-projections.py
46 lines (34 loc) · 1.77 KB
/
scrape-projections.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import time
import urllib.request
from bs4 import BeautifulSoup
def fetch_projections_page(week, position_id):
assert 1 <= week <= 17, f'Invalid week: {week}'
base_url = 'https://www.fantasysharks.com/apps/bert/forecasts/projections.php'
url = f'{base_url}?League=-1&Position={position_id}&scoring=1&Segment={595 + week}&uid=4'
request = urllib.request.Request(url)
request.add_header('User-Agent', 'projection-scraper 0.1')
with urllib.request.urlopen(request) as response:
return response.read()
def scrape_projections():
for week in range(1, 17):
position_map = { 'RB': 2, 'WR': 4, 'TE': 5, 'QB': 1, 'D': 6, 'K': 7 }
for position, position_id in position_map.items():
time.sleep(5) # be polite
html = fetch_projections_page(week, position_map[position])
soup = BeautifulSoup(html, 'lxml')
table = soup.find('table', id='toolData')
header_row = table.find('tr')
column_names = [th.text for th in header_row.find_all('th')]
for row in table.find_all('tr'):
column_entries = [tr.text for tr in row.find_all('td')]
# exclude repeated header rows and the "Tier N" rows
if len(column_entries) != len(column_names):
continue
# extract Fantasy Shark's player id
player_link = row.find('a')
player_id = int(player_link['href'].split('=')[-1].strip())
# yield a dictionary of this player's weekly projection
player = { 'id': player_id, 'week': week, 'position': position }
for key, entry in zip(column_names, column_entries):
player[key.lower()] = entry
yield player