From 97fb7226d96764ed553b531f6ea28479cade3acb Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Wed, 7 Aug 2024 17:27:48 -0500
Subject: [PATCH] banner photo (#39)

---
 pyproject.toml                |  2 +-
 staffspy/__init__.py          |  2 +-
 staffspy/linkedin/employee.py | 22 ++++++++++++----------
 staffspy/linkedin/skills.py   | 23 ++++++++++++++---------
 staffspy/utils/models.py      |  6 ++++++
 5 files changed, 34 insertions(+), 21 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 82ba55b..53b2fbf 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "staffspy"
-version = "0.2.9"
+version = "0.2.10"
 description = "Staff scraper library for LinkedIn"
 authors = ["Cullen Watson <cullen@bunsly.com>"]
 readme = "README.md"
diff --git a/staffspy/__init__.py b/staffspy/__init__.py
index 06f31f0..3fabaa8 100644
--- a/staffspy/__init__.py
+++ b/staffspy/__init__.py
@@ -76,7 +76,7 @@ def scrape_staff(
         linkedin_member_df = staff_df[staff_df["name"] == "LinkedIn Member"]
         non_linkedin_member_df = staff_df[staff_df["name"] != "LinkedIn Member"]
         staff_df = pd.concat([non_linkedin_member_df, linkedin_member_df])
-        logger.info(f"Scraped {len(staff_df)} staff members from {company_name}")
+        logger.info(f"Scraped {len(staff_df)} staff members from {company_name}, with {len(linkedin_member_df)} hidden LinkedIn users")
         return staff_df
 
     def scrape_users(
diff --git a/staffspy/linkedin/employee.py b/staffspy/linkedin/employee.py
index 405b7ad..364fcf2 100644
--- a/staffspy/linkedin/employee.py
+++ b/staffspy/linkedin/employee.py
@@ -43,16 +43,18 @@ def fetch_employee(self, base_staff, domain):
 
     def parse_emp(self, emp: Staff, emp_dict: dict):
         """Parse the employee data from the employee profile."""
-        try:
-            photo_data = emp_dict["profilePicture"]["displayImageReference"][
-                "vectorImage"
-            ]
-            photo_base_url = photo_data["rootUrl"]
-            photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
-            profile_photo = f"{photo_base_url}{photo_ext_url}"
-        except (KeyError, TypeError, IndexError, ValueError) as e:
-            profile_photo = None
 
+        def get_photo_url(emp_dict: dict, key: str):
+            try:
+                photo_data = emp_dict[key]["displayImageReference"]["vectorImage"]
+                photo_base_url = photo_data["rootUrl"]
+                photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
+                return f"{photo_base_url}{photo_ext_url}"
+            except (KeyError, TypeError, IndexError, ValueError):
+                return None
+
+        emp.profile_photo = get_photo_url(emp_dict, "profilePicture")
+        emp.banner_photo = get_photo_url(emp_dict, "backgroundPicture")
         emp.profile_id = emp_dict["publicIdentifier"]
         try:
             emp.headline = emp_dict.get('headline')
@@ -62,10 +64,10 @@ def parse_emp(self, emp: Staff, emp_dict: dict):
             pass
         emp.is_connection = next(iter(emp_dict['memberRelationship']['memberRelationshipUnion'])) == 'connection'
         emp.open_to_work = emp_dict['profilePicture'].get('frameType')=='OPEN_TO_WORK'
+        emp.is_hiring = emp_dict['profilePicture'].get('frameType')=='HIRING'
 
         emp.profile_link = f'https://www.linkedin.com/in/{emp_dict["publicIdentifier"]}'
 
-        emp.profile_photo = profile_photo
         emp.first_name = emp_dict["firstName"]
         emp.last_name = emp_dict["lastName"].split(',')[0]
         emp.potential_emails = utils.create_emails(
diff --git a/staffspy/linkedin/skills.py b/staffspy/linkedin/skills.py
index 38a530a..0daaf54 100644
--- a/staffspy/linkedin/skills.py
+++ b/staffspy/linkedin/skills.py
@@ -43,18 +43,23 @@ def parse_skills(self, sections):
                 "components"
             ]["elements"]
             for elem in elems:
+                passed_assessment,endorsements = None,0
                 entity = elem["components"]["entityComponent"]
                 name = entity["titleV2"]["text"]["text"]
                 if name in names:
                     continue
                 names.add(name)
-                try:
-                    endorsements = int(
-                        entity["subComponents"]["components"][0]["components"][
-                            "insightComponent"
-                        ]["text"]["text"]["text"].replace(" endorsements", "")
-                    )
-                except:
-                    endorsements = 0
-                skills.append(Skill(name=name, endorsements=endorsements))
+                components = entity["subComponents"]["components"]
+                for component in components:
+
+                    try:
+                        candidate = component["components"]["insightComponent"]["text"]["text"]["text"]
+                        if " endorsements" in candidate:
+                            endorsements = int(candidate.replace(" endorsements", ""))
+                        if "Passed LinkedIn Skill Assessment" in candidate:
+                            passed_assessment = True
+                    except:
+                        pass
+
+                skills.append(Skill(name=name, endorsements=endorsements, passed_assessment=passed_assessment))
         return skills
diff --git a/staffspy/utils/models.py b/staffspy/utils/models.py
index e9d006c..4d14722 100644
--- a/staffspy/utils/models.py
+++ b/staffspy/utils/models.py
@@ -23,11 +23,13 @@ def to_dict(self):
 class Skill(BaseModel):
     name: str | None = None
     endorsements: int | None = None
+    passed_assessment: bool | None = None
 
     def to_dict(self):
         return {
             "name": self.name,
             "endorsements": self.endorsements if self.endorsements else 0,
+            "passed_assessment": self.passed_assessment
         }
 
 
@@ -94,7 +96,9 @@ class Staff(BaseModel):
     creator: bool | None = None
     premium: bool | None = None
     open_to_work: bool | None = None
+    is_hiring: bool | None = None
     profile_photo: str | None = None
+    banner_photo: str | None = None
     skills: list[Skill] | None = None
     experiences: list[Experience] | None = None
     certifications: list[Certification] | None = None
@@ -156,6 +160,7 @@ def to_dict(self):
             "creator": self.creator,
             "influencer": self.influencer,
             "open_to_work": self.open_to_work,
+            "is_hiring": self.is_hiring,
             "current_position":self.current_position,
             "current_company": top_three_companies[0],
             "past_company_1": top_three_companies[1],
@@ -186,6 +191,7 @@ def to_dict(self):
             "potential_emails": ', '.join(self.potential_emails) if self.potential_emails else None,
             "profile_link": self.profile_link,
             "profile_photo": self.profile_photo,
+            "banner_photo": self.banner_photo,
         }
 
     def estimate_age_based_on_education(self):