Skip to content
This repository has been archived by the owner on May 23, 2018. It is now read-only.

Commit

Permalink
do not try to read source of page when phantomjs crashes
Browse files Browse the repository at this point in the history
  • Loading branch information
JeffAMcGee committed Dec 31, 2013
1 parent 3e5d392 commit 33a884b
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
3 changes: 0 additions & 3 deletions scrapy_webdriver/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,6 @@ def _download_request(self, request, spider):
spider.log('FAIL: ' + str(request.manager._webdriver), level=log.DEBUG)
signal.alarm(0)

# set page_source to blank so that WebdriverResponse doesn't complain
exception.page_source = '<html><head></head><body></body></html>'

# log a nice error message
msg = 'Error while downloading %s with webdriver (%s)' % \
(request.url, exception)
Expand Down
7 changes: 6 additions & 1 deletion scrapy_webdriver/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ def __str__(self):
class WebdriverResponse(TextResponse):
"""A Response that will feed the webdriver page into its body."""
def __init__(self, url, webdriver, exception=None, **kwargs):
kwargs.setdefault('body', webdriver.page_source)
# If the response resulted in an exception, the body may not exist
if exception:
page_source = '<html><head></head><body></body></html>'
else:
page_source = webdriver.page_source
kwargs.setdefault('body', page_source)
kwargs.setdefault('encoding', 'utf-8')
super(WebdriverResponse, self).__init__(url, **kwargs)
self.actions = ActionChains(webdriver)
Expand Down

0 comments on commit 33a884b

Please sign in to comment.