Skip to content

Commit

Permalink
Fixes #456 Add Image/Video search support for Bing (#448)
Browse files Browse the repository at this point in the history
* Addresses #320 and #321 Add Image/Video search support for Bing

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Added tests

* Added tests

* Added tests

* Added tests

* Added tests
  • Loading branch information
bhaveshAn authored and mariobehling committed Jan 26, 2018
1 parent 423896a commit bd30392
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 2 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ install:
- pip install -r requirements.txt

before_script:
- flake8 . --count --max-complexity=15 --show-source --statistics
- flake8 . --count --max-complexity=16 --show-source --statistics

script:
- python -m app.server > /dev/null &
Expand Down
4 changes: 4 additions & 0 deletions app/scrapers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ def feed_gen(query, engine, count=10, qtype=''):
engine = old_names.get(engine, engine)
if engine in ('quora', 'youtube'):
urls = scrapers[engine].search_without_count(query)
elif engine in ('bing',) and qtype == 'vid':
urls = scrapers[engine].video_search_without_count(query)
elif engine in ('bing',) and qtype == 'isch':
urls = scrapers[engine].image_search_without_count(query)
else:
urls = scrapers[engine].search(query, count, qtype)
return urls
42 changes: 42 additions & 0 deletions app/scrapers/bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,13 @@ class Bing(Scraper):
def __init__(self):
Scraper.__init__(self)
self.url = 'http://www.bing.com/search'
self.videoURL = 'https://www.bing.com/videos/search'
self.imageURL = 'https://www.bing.com/images/search'
self.defaultStart = 1
self.startKey = 'first'
self.name = 'bing'
self.videoKey = 'FORM'
self.imageKey = 'FORM'

@staticmethod
def parse_response(soup):
Expand All @@ -31,3 +35,41 @@ def parse_response(soup):
print('Bing parsed: ' + str(urls))

return urls

@staticmethod
def parse_video_response(soup):
""" Parse response and returns the urls
Returns: urls (list)
[[Tile1, url1], [Title2, url2], ...]
"""
urls = []
for a in soup.findAll('a', attrs={'class': 'mc_vtvc_link'}):
title = a.get('aria-label').split(' Duration')[0]
url = 'https://www.bing.com' + a.get('href')
urls.append({
'title': title,
'link': url
})

print('Bing parsed: ' + str(urls))

return urls

@staticmethod
def parse_image_response(soup):
""" Parse response and returns the urls
Returns: urls (list)
[[url1], [url2], ...]
"""
urls = []
for a in soup.findAll('a', attrs={'class': 'iusc'}):
url = 'https://www.bing.com' + a.get('href')
urls.append({
'link': url
})

print('Bing parsed: ' + str(urls))

return urls
28 changes: 28 additions & 0 deletions app/scrapers/generalized.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,31 @@ def search_without_count(self, query):
soup = BeautifulSoup(response.text, 'html.parser')
urls = self.parse_response(soup)
return urls

def video_search_without_count(self, query):
"""
Search for the query and return set of urls
Returns: list
"""
urls = []
if self.name in ['bing']:
url = self.videoURL
payload = {self.queryKey: query, self.videoKey: 'HDRSC3'}
response = requests.get(url, headers=self.headers, params=payload)
soup = BeautifulSoup(response.text, 'html.parser')
urls = self.parse_video_response(soup)
return urls

def image_search_without_count(self, query):
"""
Search for the query and return set of urls
Returns: list
"""
urls = []
if self.name in ['bing']:
url = self.imageURL
payload = {self.queryKey: query, self.imageKey: 'HDRSC2'}
response = requests.get(url, headers=self.headers, params=payload)
soup = BeautifulSoup(response.text, 'html.parser')
urls = self.parse_image_response(soup)
return urls
3 changes: 2 additions & 1 deletion app/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ def search(search_engine):
unicode # unicode is undefined in Python 3 so NameError is raised
for line in result:
line['link'] = line['link'].encode('utf-8')
line['title'] = line['title'].encode('utf-8')
if 'title' in line:
line['title'] = line['title'].encode('utf-8')
if 'desc' in line:
line['desc'] = line['desc'].encode('utf-8')
except NameError:
Expand Down
24 changes: 24 additions & 0 deletions test/test_bing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,27 @@ def test_parse_response():
'desc': u'mock_desc'
}]
assert resp == expected_resp


def test_parse_image_response():
html_text = """<a class="iusc" href="mock_url">mock_title</a>"""
dummy_soup = BeautifulSoup(html_text, 'html.parser')
resp = Bing().parse_image_response(dummy_soup)
link_image = 'https://www.bing.com' + 'mock_url'
expected_resp = [{
'link': link_image
}]
assert resp == expected_resp


def test_parse_video_response():
html_text = """<a aria-label="mock_title Duration" class="mc_vtvc_link"
href="mock_url"></a>"""
dummy_soup = BeautifulSoup(html_text, 'html.parser')
resp = Bing().parse_video_response(dummy_soup)
link_video = 'https://www.bing.com' + 'mock_url'
expected_resp = [{
'title': u'mock_title',
'link': link_video,
}]
assert resp == expected_resp

0 comments on commit bd30392

Please sign in to comment.