banner photo

cullenwatson · Aug 7, 2024 · e68a5d2 · e68a5d2
1 parent 4252853
commit e68a5d2
Show file tree

Hide file tree

Showing 5 changed files with 34 additions and 21 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "staffspy"
-version = "0.2.9"
+version = "0.2.10"
 description = "Staff scraper library for LinkedIn"
 authors = ["Cullen Watson <[email protected]>"]
 readme = "README.md"

diff --git a/staffspy/__init__.py b/staffspy/__init__.py
@@ -76,7 +76,7 @@ def scrape_staff(
         linkedin_member_df = staff_df[staff_df["name"] == "LinkedIn Member"]
         non_linkedin_member_df = staff_df[staff_df["name"] != "LinkedIn Member"]
         staff_df = pd.concat([non_linkedin_member_df, linkedin_member_df])
-        logger.info(f"Scraped {len(staff_df)} staff members from {company_name}")
+        logger.info(f"Scraped {len(staff_df)} staff members from {company_name}, with {len(linkedin_member_df)} hidden LinkedIn users")
         return staff_df
 
     def scrape_users(

diff --git a/staffspy/linkedin/employee.py b/staffspy/linkedin/employee.py
@@ -43,16 +43,18 @@ def fetch_employee(self, base_staff, domain):
 
     def parse_emp(self, emp: Staff, emp_dict: dict):
         """Parse the employee data from the employee profile."""
-        try:
-            photo_data = emp_dict["profilePicture"]["displayImageReference"][
-                "vectorImage"
-            ]
-            photo_base_url = photo_data["rootUrl"]
-            photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
-            profile_photo = f"{photo_base_url}{photo_ext_url}"
-        except (KeyError, TypeError, IndexError, ValueError) as e:
-            profile_photo = None
 
+        def get_photo_url(emp_dict: dict, key: str):
+            try:
+                photo_data = emp_dict[key]["displayImageReference"]["vectorImage"]
+                photo_base_url = photo_data["rootUrl"]
+                photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
+                return f"{photo_base_url}{photo_ext_url}"
+            except (KeyError, TypeError, IndexError, ValueError):
+                return None
+
+        emp.profile_photo = get_photo_url(emp_dict, "profilePicture")
+        emp.banner_photo = get_photo_url(emp_dict, "backgroundPicture")
         emp.profile_id = emp_dict["publicIdentifier"]
         try:
             emp.headline = emp_dict.get('headline')
@@ -62,10 +64,10 @@ def parse_emp(self, emp: Staff, emp_dict: dict):
             pass
         emp.is_connection = next(iter(emp_dict['memberRelationship']['memberRelationshipUnion'])) == 'connection'
         emp.open_to_work = emp_dict['profilePicture'].get('frameType')=='OPEN_TO_WORK'
+        emp.is_hiring = emp_dict['profilePicture'].get('frameType')=='HIRING'
 
         emp.profile_link = f'https://www.linkedin.com/in/{emp_dict["publicIdentifier"]}'
 
-        emp.profile_photo = profile_photo
         emp.first_name = emp_dict["firstName"]
         emp.last_name = emp_dict["lastName"].split(',')[0]
         emp.potential_emails = utils.create_emails(

diff --git a/staffspy/linkedin/skills.py b/staffspy/linkedin/skills.py
@@ -43,18 +43,23 @@ def parse_skills(self, sections):
                 "components"
             ]["elements"]
             for elem in elems:
+                passed_assessment,endorsements = None,0
                 entity = elem["components"]["entityComponent"]
                 name = entity["titleV2"]["text"]["text"]
                 if name in names:
                     continue
                 names.add(name)
-                try:
-                    endorsements = int(
-                        entity["subComponents"]["components"][0]["components"][
-                            "insightComponent"
-                        ]["text"]["text"]["text"].replace(" endorsements", "")
-                    )
-                except:
-                    endorsements = 0
-                skills.append(Skill(name=name, endorsements=endorsements))
+                components = entity["subComponents"]["components"]
+                for component in components:
+
+                    try:
+                        candidate = component["components"]["insightComponent"]["text"]["text"]["text"]
+                        if " endorsements" in candidate:
+                            endorsements = int(candidate.replace(" endorsements", ""))
+                        if "Passed LinkedIn Skill Assessment" in candidate:
+                            passed_assessment = True
+                    except:
+                        pass
+
+                skills.append(Skill(name=name, endorsements=endorsements, passed_assessment=passed_assessment))
         return skills
diff --git a/staffspy/utils/models.py b/staffspy/utils/models.py
@@ -23,11 +23,13 @@ def to_dict(self):
 class Skill(BaseModel):
     name: str | None = None
     endorsements: int | None = None
+    passed_assessment: bool | None = None
 
     def to_dict(self):
         return {
             "name": self.name,
             "endorsements": self.endorsements if self.endorsements else 0,
+            "passed_assessment": self.passed_assessment
         }
 
 
@@ -94,7 +96,9 @@ class Staff(BaseModel):
     creator: bool | None = None
     premium: bool | None = None
     open_to_work: bool | None = None
+    is_hiring: bool | None = None
     profile_photo: str | None = None
+    banner_photo: str | None = None
     skills: list[Skill] | None = None
     experiences: list[Experience] | None = None
     certifications: list[Certification] | None = None
@@ -156,6 +160,7 @@ def to_dict(self):
             "creator": self.creator,
             "influencer": self.influencer,
             "open_to_work": self.open_to_work,
+            "is_hiring": self.is_hiring,
             "current_position":self.current_position,
             "current_company": top_three_companies[0],
             "past_company_1": top_three_companies[1],
@@ -186,6 +191,7 @@ def to_dict(self):
             "potential_emails": ', '.join(self.potential_emails) if self.potential_emails else None,
             "profile_link": self.profile_link,
             "profile_photo": self.profile_photo,
+            "banner_photo": self.banner_photo,
         }
 
     def estimate_age_based_on_education(self):