diff --git a/uoftscrapers/scrapers/buildings/__init__.py b/uoftscrapers/scrapers/buildings/__init__.py index ce93c03..b5716fe 100644 --- a/uoftscrapers/scrapers/buildings/__init__.py +++ b/uoftscrapers/scrapers/buildings/__init__.py @@ -2,7 +2,6 @@ from bs4 import BeautifulSoup from collections import OrderedDict from decimal import * -import json import os import re @@ -83,13 +82,12 @@ def get_map_json(campus): Scraper.get(Buildings.host) headers = {'Referer': Buildings.host} - html = Scraper.get('%s%s%s' % ( + data = Scraper.get('%s%s%s' % ( Buildings.host, 'data/map/', campus - ), headers=headers) + ), headers=headers, json=True) - data = json.loads(html) return data @staticmethod @@ -99,11 +97,10 @@ def get_regions_json(campus): Scraper.get(Buildings.host) headers = {'Referer': Buildings.host} - html = Scraper.get('%s%s%s' % ( + data = Scraper.get('%s%s%s' % ( Buildings.host, 'data/regions/', campus - ), headers=headers) + ), headers=headers, json=True) - data = json.loads(html) return data diff --git a/uoftscrapers/scrapers/exams/utsg.py b/uoftscrapers/scrapers/exams/utsg.py index 11c7966..2ba0fe0 100644 --- a/uoftscrapers/scrapers/exams/utsg.py +++ b/uoftscrapers/scrapers/exams/utsg.py @@ -28,10 +28,15 @@ def scrape(location='.', year=None): 'Referer': UTSGExams.host } html = Scraper.get('%s%s' % (UTSGExams.host, p), - headers=headers) - soup = BeautifulSoup(html, 'html.parser') + headers=headers, + max_attempts=3) - if not soup.find('table', class_='vertical listing'): + try: + soup = BeautifulSoup(html, 'html.parser') + except TypeError: + soup = None + + if not (html and soup and soup.find(class_='vertical listing')): # no exam data available Scraper.logger.info('No %s exams.' % p.upper()) continue diff --git a/uoftscrapers/scrapers/utils/layers.py b/uoftscrapers/scrapers/utils/layers.py index cb70b66..289a840 100644 --- a/uoftscrapers/scrapers/utils/layers.py +++ b/uoftscrapers/scrapers/utils/layers.py @@ -10,7 +10,6 @@ class LayersScraper: """ host = 'http://map.utoronto.ca/' - s = requests.Session() @staticmethod def get_layers_json(campus): @@ -18,16 +17,13 @@ def get_layers_json(campus): Scraper.logger.info('Retrieving map layers for %s.' % campus.upper()) - headers = { - 'Referer': LayersScraper.host - } - html = LayersScraper.s.get('%s%s%s' % ( + headers = {'Referer': LayersScraper.host} + data = Scraper.get('%s%s%s' % ( LayersScraper.host, 'data/map/', campus - ), headers=headers).text + ), headers=headers, json=True) - data = json.loads(html) return data['layers'] @staticmethod diff --git a/uoftscrapers/scrapers/utils/scraper.py b/uoftscrapers/scrapers/utils/scraper.py index bc1981a..64e5bda 100644 --- a/uoftscrapers/scrapers/utils/scraper.py +++ b/uoftscrapers/scrapers/utils/scraper.py @@ -41,6 +41,7 @@ def get(url, params=None, cookies=None, headers=None, json=False, max_attempts=1 doc = r else: sleep(0.5) + attempts += 1 except (requests.exceptions.Timeout, requests.exceptions.ConnectionError): attempts += 1