From df2d1016c44f044377f702f5423afc9a68d5f07d Mon Sep 17 00:00:00 2001
From: BarisYazici <barisyazici@sabanciuniv.edu>
Date: Tue, 14 Mar 2023 00:40:23 +0100
Subject: [PATCH] feat set get request header cookie

---
 config.yaml.dist                    |  7 +++++++
 flathunter/crawl_immobilienscout.py | 22 +++++++++++++++++++---
 2 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/config.yaml.dist b/config.yaml.dist
index 0cf29ccb..32a89360 100644
--- a/config.yaml.dist
+++ b/config.yaml.dist
@@ -191,3 +191,10 @@ apprise:
 # For websites like idealista.it, there are anti-crawler measures that can be
 # circumvented using proxies.
 # use_proxy_list: True
+
+# If you are having bot detection issues with immobilienscout24,
+# you can set the cookie that you get from your logged in account
+# Go to the immobilienscout24.de website, log in, and then in the developer tools
+# (F12) go to the "Network" tab, then "Cookies" and copy the value of the
+# "reese84" cookie.
+immoscout_cookie: ""
\ No newline at end of file
diff --git a/flathunter/crawl_immobilienscout.py b/flathunter/crawl_immobilienscout.py
index 1c98ef2f..711525a9 100644
--- a/flathunter/crawl_immobilienscout.py
+++ b/flathunter/crawl_immobilienscout.py
@@ -31,7 +31,7 @@ def __init__(self, config):
         self.driver = None
         self.checkbox = None
         self.afterlogin_string = None
-
+        self.set_cookie_enabled = "immoscout_cookie" in self.config
         if config.captcha_enabled():
             self.checkbox = config.get_captcha_checkbox()
             self.afterlogin_string = config.get_captcha_afterlogin_string()
@@ -147,18 +147,34 @@ def extract_entry_from_javascript(self, entry):
             'rooms': str(entry.get("numberOfRooms", ''))
         }
 
+    def set_cookie(self):
+        """Sets request header cookie parameter to identify as a logged in user"""
+        self.HEADERS['Cookie'] = f'reese84:${self.config["immoscout_cookie"]}'
+
+    # pylint: disable=too-many-arguments
+    def get_soup_from_url(self,
+                          url,
+                          driver=None,
+                          checkbox=None,
+                          afterlogin_string=None,
+                          set_cookie=False):
+        if set_cookie:
+            self.set_cookie()
+        return super().get_soup_from_url(url, driver, checkbox, afterlogin_string)
+
     def get_page(self, search_url, driver=None, page_no=None):
         """Applies a page number to a formatted search URL and fetches the exposes at that page"""
         return self.get_soup_from_url(
             search_url.format(page_no),
             driver=driver,
             checkbox=self.checkbox,
-            afterlogin_string=self.afterlogin_string
+            afterlogin_string=self.afterlogin_string,
+            set_cookie= self.set_cookie_enabled
         )
 
     def get_expose_details(self, expose):
         """Loads additional details for an expose by processing the expose detail URL"""
-        soup = self.get_soup_from_url(expose['url'])
+        soup = self.get_soup_from_url(expose['url'], self.set_cookie_enabled)
         date = soup.find('dd', {"class": "is24qa-bezugsfrei-ab"})
         expose['from'] = datetime.datetime.now().strftime("%2d.%2m.%Y")
         if date is not None: