From 70a7e967b711a89326e7fb46cb7339a13d3bfba5 Mon Sep 17 00:00:00 2001
From: Bhavesh Anand <bhaveshanand7@gmail.com>
Date: Mon, 29 Jan 2018 21:23:59 +0530
Subject: [PATCH] Fixes #455 Add Image/Video search support for Parsijoo (#449)

* Addresses #320 and #321 Add Image/Video search support for Parsijoo

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add staticmethod decorator

* Add tests
---
 .travis.yml                 |  2 +-
 app/scrapers/__init__.py    |  4 ++--
 app/scrapers/generalized.py | 15 ++++++++++---
 app/scrapers/parsijoo.py    | 42 +++++++++++++++++++++++++++++++++++++
 test/test_parsijoo.py       | 25 ++++++++++++++++++++++
 5 files changed, 82 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 45250049..3772ca0c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -12,7 +12,7 @@ install:
   - pip install -r requirements.txt
 
 before_script:
-  - flake8 . --count --max-complexity=16 --show-source --statistics
+  - flake8 . --count --max-complexity=16 --show-source --statistics --max-line-length=100
 
 script:
   - python -m app.server > /dev/null &
diff --git a/app/scrapers/__init__.py b/app/scrapers/__init__.py
index 18e09569..190bf86a 100644
--- a/app/scrapers/__init__.py
+++ b/app/scrapers/__init__.py
@@ -42,9 +42,9 @@ def feed_gen(query, engine, count=10, qtype=''):
     engine = old_names.get(engine, engine)
     if engine in ('quora', 'youtube'):
         urls = scrapers[engine].search_without_count(query)
-    elif engine in ('bing',) and qtype == 'vid':
+    elif engine in ('bing', 'parsijoo') and qtype == 'vid':
         urls = scrapers[engine].video_search_without_count(query)
-    elif engine in ('bing',) and qtype == 'isch':
+    elif engine in ('bing', 'parsijoo') and qtype == 'isch':
         urls = scrapers[engine].image_search_without_count(query)
     elif engine in ('ask',) and qtype == 'vid':
         urls = scrapers[engine].video_search(query, count, qtype)
diff --git a/app/scrapers/generalized.py b/app/scrapers/generalized.py
index a51a1546..204533a0 100644
--- a/app/scrapers/generalized.py
+++ b/app/scrapers/generalized.py
@@ -133,13 +133,19 @@ def video_search_without_count(self, query):
             Returns: list
         """
         urls = []
-        if self.name in ['bing']:
+        if self.name in ['parsijoo']:
+            url = self.videoURL
+            payload = {self.queryKey: query}
+        elif self.name in ['bing']:
             url = self.videoURL
             payload = {self.queryKey: query, self.videoKey: 'HDRSC3'}
         response = requests.get(url, headers=self.headers, params=payload)
         soup = BeautifulSoup(response.text, 'html.parser')
         urls = self.parse_video_response(soup)
-        return urls
+        if len(urls) == 0:
+            return "No video with this Keyword"
+        else:
+            return urls
 
     def image_search_without_count(self, query):
         """
@@ -147,7 +153,10 @@ def image_search_without_count(self, query):
             Returns: list
         """
         urls = []
-        if self.name in ['bing']:
+        if self.name in ['parsijoo']:
+            url = self.imageURL
+            payload = {self.queryKey: query}
+        elif self.name in ['bing']:
             url = self.imageURL
             payload = {self.queryKey: query, self.imageKey: 'HDRSC2'}
         response = requests.get(url, headers=self.headers, params=payload)
diff --git a/app/scrapers/parsijoo.py b/app/scrapers/parsijoo.py
index 54d916b3..69e5e40a 100644
--- a/app/scrapers/parsijoo.py
+++ b/app/scrapers/parsijoo.py
@@ -8,6 +8,8 @@ class Parsijoo(Scraper):
     def __init__(self):
         Scraper.__init__(self)
         self.url = 'https://parsijoo.ir/web'
+        self.imageURL = 'https://image.parsijoo.ir/image'
+        self.videoURL = 'https://video.parsijoo.ir/video'
         self.defaultStart = 0
         self.startKey = 'co'
         self.name = 'parsijoo'
@@ -29,3 +31,43 @@ def parse_response(soup):
         print('Parsijoo parsed: ' + str(urls))
 
         return urls
+
+    @staticmethod
+    def parse_video_response(soup):
+        """ Parse response and returns the urls
+
+            Returns: urls (list)
+                    [[Tile1, url1], [Title2, url2], ...]
+        """
+        urls = []
+        for a in soup.findAll('a', attrs={'class': 'over-page'}):
+            title = a.get('title')
+            url = 'https://video.parsijoo.ir' + a.get('href')
+            urls.append({
+                'title': title,
+                'link': url
+            })
+
+        print('Parsijoo parsed: ' + str(urls))
+
+        return urls
+
+    @staticmethod
+    def parse_image_response(soup):
+        """ Parse response and returns the urls
+
+            Returns: urls (list)
+                    [[url1], [url2], ...]
+        """
+        urls = []
+        for div in \
+                soup.findAll('div', attrs={'class': 'image-container overflow'}):
+            a = div.find('a')
+            url = 'https://image.parsijoo.ir' + a.get('href')
+            urls.append({
+                'link': url
+            })
+
+        print('Parsijoo parsed: ' + str(urls))
+
+        return urls
diff --git a/test/test_parsijoo.py b/test/test_parsijoo.py
index e89b5387..8682aedf 100644
--- a/test/test_parsijoo.py
+++ b/test/test_parsijoo.py
@@ -20,3 +20,28 @@ def test_parse_response():
     }]
     resp = Parsijoo().parse_response(dummy_soup)
     assert resp == expected_resp
+
+
+def test_parse_video_response():
+    html_text = """<a href="mock_url" class="over-page"
+    title="mock_title">mock_title</a>"""
+    dummy_soup = BeautifulSoup(html_text, 'html.parser')
+    url = 'https://video.parsijoo.ir' + "mock_url"
+    expected_resp = [{
+        'title': u'mock_title',
+        'link': url,
+    }]
+    resp = Parsijoo().parse_video_response(dummy_soup)
+    assert resp == expected_resp
+
+
+def test_parse_image_response():
+    html_text = """<div class="image-container overflow"><a href="mock_url"
+    title="mock_title">mock_title</a></div>"""
+    dummy_soup = BeautifulSoup(html_text, 'html.parser')
+    image_url = 'https://image.parsijoo.ir' + 'mock_url'
+    expected_resp = [{
+        'link': image_url,
+    }]
+    resp = Parsijoo().parse_image_response(dummy_soup)
+    assert resp == expected_resp