Skip to content

Commit

Permalink
Patch hang on book retrieve
Browse files Browse the repository at this point in the history
  • Loading branch information
kashav committed Apr 29, 2016
1 parent 7383412 commit 84cadfc
Showing 1 changed file with 21 additions and 7 deletions.
28 changes: 21 additions & 7 deletions uoftscrapers/scrapers/textbooks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,9 @@ def scrape(location='.'):
def retrieve_terms():
html = Scraper.get('%s/buy_courselisting.asp' % Textbooks.host)

if html is None:
return []

listing = BeautifulSoup(html, "html.parser")
terms = listing.find(id='fTerm').find_all('option')[1:]

Expand Down Expand Up @@ -137,7 +140,10 @@ def retrieve_departments(terms):
}

xml = Scraper.get('%s/textbooks_xml.asp' % Textbooks.host,
params=payload, headers=headers)
params=payload, headers=headers, max_attempts=3)

if xml is None:
continue

departments = BeautifulSoup(xml, "xml").find_all('department')
for department in departments:
Expand Down Expand Up @@ -168,7 +174,10 @@ def retrieve_courses(department):
}

xml = Scraper.get('%s/textbooks_xml.asp' % Textbooks.host,
params=payload, headers=headers)
params=payload, headers=headers, max_attempts=3)

if xml is None:
return []

courses = BeautifulSoup(xml, "xml").find_all('course')
for course in courses:
Expand Down Expand Up @@ -196,7 +205,10 @@ def retrieve_sections(course):
}

xml = Scraper.get('%s/textbooks_xml.asp' % Textbooks.host,
params=payload, headers=headers)
params=payload, headers=headers, max_attempts=3)

if xml is None:
return []

sections = BeautifulSoup(xml, "xml").find_all('section')
for section in sections:
Expand All @@ -223,14 +235,16 @@ def retrieve_books(section):
'Referer': '%s/buy_courselisting.asp' % Textbooks.host
}

xml = Scraper.get('%s/textbooks_xml.asp' % Textbooks.host,
params=payload, headers=headers)
html = Scraper.get('%s/textbooks_xml.asp' % Textbooks.host,
params=payload, headers=headers, max_attempts=3)

if html is None:
return []

soup = BeautifulSoup(xml, "html.parser")
soup = BeautifulSoup(html, "html.parser")
books = soup.find_all('tr', {'class': 'book'})

if books == None:
done += 1
return []

for book in books:
Expand Down

0 comments on commit 84cadfc

Please sign in to comment.