From 733372411809e5a3245115f7d981db846386a047 Mon Sep 17 00:00:00 2001
From: Cullen Watson <cullen@cullenwatson.com>
Date: Tue, 16 Jul 2024 16:38:44 -0500
Subject: [PATCH] refactor: use parse classes

---
 staffspy/linkedin/__init__.py       | 414 +++++-----------------------
 staffspy/linkedin/certifications.py |  79 ++++++
 staffspy/linkedin/employee.py       |  76 +++++
 staffspy/linkedin/experiences.py    | 116 ++++++++
 staffspy/linkedin/schools.py        |  58 ++++
 staffspy/linkedin/skills.py         |  56 ++++
 6 files changed, 449 insertions(+), 350 deletions(-)
 create mode 100644 staffspy/linkedin/certifications.py
 create mode 100644 staffspy/linkedin/employee.py
 create mode 100644 staffspy/linkedin/experiences.py
 create mode 100644 staffspy/linkedin/schools.py
 create mode 100644 staffspy/linkedin/skills.py

diff --git a/staffspy/linkedin/__init__.py b/staffspy/linkedin/__init__.py
index 1795fd6..183a9de 100644
--- a/staffspy/linkedin/__init__.py
+++ b/staffspy/linkedin/__init__.py
@@ -1,32 +1,52 @@
-import json
+"""
+staffspy.linkedin
+~~~~~~~~~~~~~~~~~~~
+
+This module contains routines to scrape LinkedIn.
+"""
+
 import re
 import sys
+import json
 from urllib.parse import quote
 from concurrent.futures import ThreadPoolExecutor, as_completed
 
 import staffspy.utils as utils
-from staffspy.utils import logger
+from linkedin.certifications import CertificationFetcher
+from linkedin.employee import EmployeeFetcher
+from linkedin.experiences import ExperiencesFetcher
+from linkedin.schools import SchoolsFetcher
+from linkedin.skills import SkillsFetcher
 from staffspy.exceptions import TooManyRequests, BadCookies, GeoUrnNotFound
-from staffspy.models import Staff, Experience, Certification, Skill, School
+from staffspy.models import Staff
+from staffspy.utils import logger
 
 
 class LinkedInScraper:
-    company_id_ep = "https://www.linkedin.com/voyager/api/organization/companies?q=universalName&universalName="
     employees_ep = "https://www.linkedin.com/voyager/api/graphql?variables=(start:{offset},query:(flagshipSearchIntent:SEARCH_SRP,{search}queryParameters:List((key:currentCompany,value:List({company_id})),{location}(key:resultType,value:List(PEOPLE))),includeFiltersInResponse:false),count:{count})&queryId=voyagerSearchDashClusters.66adc6056cf4138949ca5dcb31bb1749"
-    employee_ep = "https://www.linkedin.com/voyager/api/voyagerIdentityDashProfiles?count=1&decorationId=com.linkedin.voyager.dash.deco.identity.profile.TopCardComplete-138&memberIdentity={employee_id}&q=memberIdentity"
-    skills_ep = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:skills,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
-    experience_ep = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:experience,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
-    certifications_ep = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:certifications,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
-    schools_ep = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:education,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
-    urn_ep = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSearchDashReusableTypeahead.57a4fa1dd92d3266ed968fdbab2d7bf5&queryName=SearchReusableTypeaheadByType&variables=(query:(showFullLastNameForConnections:false,typeaheadFilterQuery:(geoSearchTypes:List(MARKET_AREA,COUNTRY_REGION,ADMIN_DIVISION_1,CITY))),keywords:{location},type:GEO,start:0)"
+    company_id_ep = "https://www.linkedin.com/voyager/api/organization/companies?q=universalName&universalName="
+    location_id_ep = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerSearchDashReusableTypeahead.57a4fa1dd92d3266ed968fdbab2d7bf5&queryName=SearchReusableTypeaheadByType&variables=(query:(showFullLastNameForConnections:false,typeaheadFilterQuery:(geoSearchTypes:List(MARKET_AREA,COUNTRY_REGION,ADMIN_DIVISION_1,CITY))),keywords:{location},type:GEO,start:0)"
 
     def __init__(self, session_file):
         self.session = utils.load_session(session_file)
-        self.company_id = self.staff_count = self.num_staff = self.company_name = (
-            self.domain
-        ) = self.max_results = self.search_term = self.location = None
+        (
+            self.company_id,
+            self.staff_count,
+            self.num_staff,
+            self.company_name,
+            self.domain,
+            self.max_results,
+            self.search_term,
+            self.location,
+        ) = (None, None, None, None, None, None, None, None)
+        self.certs = CertificationFetcher(self.session)
+        self.skills = SkillsFetcher(self.session)
+        self.employees = EmployeeFetcher(self.session)
+        self.schools = SchoolsFetcher(self.session)
+        self.experiences = ExperiencesFetcher(self.session)
 
     def get_company_id(self, company_name):
+        """Get the company id and staff count from the company name."""
         res = self.session.get(f"{self.company_id_ep}{company_name}")
         if res.status_code != 200:
             raise Exception(
@@ -48,6 +68,7 @@ def get_company_id(self, company_name):
         return company_id, staff_count
 
     def parse_staff(self, elements):
+        """Parse the staff from the search results"""
         staff = []
 
         for elem in elements:
@@ -79,227 +100,8 @@ def parse_staff(self, elements):
                 )
         return staff
 
-    def parse_emp(self, emp, emp_dict):
-        try:
-            photo_data = emp_dict["profilePicture"]["displayImageReference"][
-                "vectorImage"
-            ]
-            photo_base_url = photo_data["rootUrl"]
-            photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
-            profile_photo = f"{photo_base_url}{photo_ext_url}"
-        except:
-            profile_photo = None
-
-        emp.profile_id = emp_dict["publicIdentifier"]
-
-        emp.profile_link = f'https://www.linkedin.com/in/{emp_dict["publicIdentifier"]}'
-
-        emp.profile_photo = profile_photo
-        emp.first_name = emp_dict["firstName"]
-        emp.last_name = emp_dict["lastName"]
-        emp.potential_email = utils.create_email(
-            emp.first_name, emp.last_name, self.domain
-        )
-
-        emp.followers = emp_dict.get("followingState", {}).get("followerCount")
-        emp.connections = emp_dict["connections"]["paging"]["total"]
-        emp.location = emp_dict["geoLocation"]["geo"]["defaultLocalizedName"]
-        emp.company = emp_dict["profileTopPosition"]["elements"][0]["companyName"]
-        edu_cards = emp_dict["profileTopEducation"]["elements"]
-        if edu_cards:
-            emp.school = edu_cards[0].get(
-                "schoolName", edu_cards[0].get("school", {}).get("name")
-            )
-        emp.influencer = emp_dict["influencer"]
-        emp.creator = emp_dict["creator"]
-        emp.premium = emp_dict["premium"]
-
-    def fetch_employee(self, base_staff):
-        ep = self.employee_ep.format(employee_id=base_staff.id)
-        res = self.session.get(ep)
-        logger.debug(f"basic info, status code - {res.status_code}")
-        if res.status_code == 429:
-            return TooManyRequests("429 Too Many Requests")
-        if not res.ok:
-            logger.debug(res.text[:200])
-            return False
-        try:
-            res_json = res.json()
-        except json.decoder.JSONDecodeError:
-            logger.debug(res.text[:200])
-            return False
-
-        try:
-            employee_json = res_json["elements"][0]
-        except (KeyError, IndexError, TypeError):
-            logger.debug(res_json)
-            return False
-
-        self.parse_emp(base_staff, employee_json)
-        return True
-
-    def fetch_skills(self, staff):
-        ep = self.skills_ep.format(employee_id=staff.id)
-        res = self.session.get(ep)
-        logger.debug(f"skills, status code - {res.status_code}")
-        if res.status_code == 429:
-            return TooManyRequests("429 Too Many Requests")
-        if not res.ok:
-            logger.debug(res.text[:200])
-            return False
-        try:
-            res_json = res.json()
-        except json.decoder.JSONDecodeError:
-            logger.debug(res.text[:200])
-            return False
-
-        tab_comp = res_json["data"]["identityDashProfileComponentsBySectionType"][
-            "elements"
-        ][0]["components"]["tabComponent"]
-        if tab_comp:
-            sections = tab_comp["sections"]
-            staff.skills = self.parse_skills(sections)
-        return True
-
-    def fetch_experiences(self, staff):
-        ep = self.experience_ep.format(employee_id=staff.id)
-        res = self.session.get(ep)
-        logger.debug(f"exps, status code - {res.status_code}")
-        if res.status_code == 429:
-            return TooManyRequests("429 Too Many Requests")
-        if not res.ok:
-            logger.debug(res.text[:200])
-            return False
-        try:
-            res_json = res.json()
-        except json.decoder.JSONDecodeError:
-            logger.debug(res.text[:200])
-            return False
-
-        try:
-            skills_json = res_json["data"][
-                "identityDashProfileComponentsBySectionType"
-            ]["elements"][0]["components"]["pagedListComponent"]["components"][
-                "elements"
-            ]
-        except (KeyError, IndexError, TypeError) as e:
-            logger.debug(res_json)
-            return False
-
-        staff.experiences = self.parse_experiences(skills_json)
-        return True
-
-    def fetch_certifications(self, staff):
-        ep = self.certifications_ep.format(employee_id=staff.id)
-        res = self.session.get(ep)
-        logger.debug(f"certs, status code - {res.status_code}")
-        if res.status_code == 429:
-            return TooManyRequests("429 Too Many Requests")
-        if not res.ok:
-            logger.debug(res.text[:200])
-            return False
-        try:
-            res_json = res.json()
-        except json.decoder.JSONDecodeError:
-            logger.debug(res.text[:200])
-            return False
-
-        try:
-            elems = res_json["data"]["identityDashProfileComponentsBySectionType"][
-                "elements"
-            ]
-        except (KeyError, IndexError, TypeError) as e:
-            logger.debug(res_json)
-            return False
-        if elems:
-            cert_elems = elems[0]["components"]["pagedListComponent"]["components"][
-                "elements"
-            ]
-            staff.certifications = self.parse_certifications(cert_elems)
-        return True
-
-    def fetch_schools(self, staff):
-        ep = self.schools_ep.format(employee_id=staff.id)
-        res = self.session.get(ep)
-        logger.debug(f"schools, status code - {res.status_code}")
-        if res.status_code == 429:
-            return TooManyRequests("429 Too Many Requests")
-
-        if not res.ok:
-            logger.debug(res.text[:200])
-            return False
-        try:
-            res_json = res.json()
-        except json.decoder.JSONDecodeError:
-            logger.debug(res.text[:200])
-            return False
-
-        try:
-            elements = res_json["data"]["identityDashProfileComponentsBySectionType"][
-                "elements"
-            ][0]["components"]["pagedListComponent"]["components"]["elements"]
-        except (KeyError, IndexError, TypeError) as e:
-            logger.debug(res_json)
-            return False
-
-        staff.schools = self.parse_schools(elements)
-        return True
-
-    def parse_schools(self, elements):
-        schools = []
-        for elem in elements:
-            entity = elem["components"]["entityComponent"]
-            if not entity:
-                break
-            years = entity["caption"]["text"] if entity["caption"] else None
-            school_name = entity["titleV2"]["text"]["text"]
-            degree = entity["subtitle"]["text"] if entity["subtitle"] else None
-            school = School(
-                years=years,
-                school=school_name,
-                degree=degree,
-            )
-            schools.append(school)
-
-        return schools
-
-    def parse_certifications(self, sections):
-        certs = []
-        for section in sections:
-            elem = section["components"]["entityComponent"]
-            if not elem:
-                break
-            title = elem["titleV2"]["text"]["text"]
-            issuer = elem["subtitle"]["text"] if elem["subtitle"] else None
-            date_issued = (
-                elem["caption"]["text"].replace("Issued ", "")
-                if elem["caption"]
-                else None
-            )
-            cert_id = (
-                elem["metadata"]["text"].replace("Credential ID ", "")
-                if elem["metadata"]
-                else None
-            )
-            try:
-                subcomp = elem["subComponents"]["components"][0]
-                cert_link = subcomp["components"]["actionComponent"]["action"][
-                    "navigationAction"
-                ]["actionTarget"]
-            except:
-                cert_link = None
-            cert = Certification(
-                title=title,
-                issuer=issuer,
-                date_issued=date_issued,
-                cert_link=cert_link,
-                cert_id=cert_id,
-            )
-            certs.append(cert)
-
-        return certs
-
     def fetch_staff(self, offset, company_id):
+        """Fetch the staff at the company using LinkedIn search"""
         ep = self.employees_ep.format(
             offset=offset,
             company_id=company_id,
@@ -334,8 +136,9 @@ def fetch_staff(self, offset, company_id):
         )
         return new_staff
 
-    def fetch_urn(self, location: str):
-        ep = self.urn_ep.format(location=quote(location))
+    def fetch_location_id(self, location: str):
+        """Fetch the location id for the location to be used in LinkedIn search"""
+        ep = self.location_id_ep.format(location=quote(location))
         res = self.session.get(ep)
         try:
             res_json = res.json()
@@ -377,7 +180,7 @@ def scrape_staff(
 
         if location:
             try:
-                self.fetch_urn(location)
+                self.fetch_location_id(location)
             except GeoUrnNotFound as e:
                 logger.error(str(e))
                 return staff_list[:max_results]
@@ -401,128 +204,39 @@ def scrape_staff(
             filter(lambda x: x.name != "LinkedIn Member", reduced_staff_list)
         )
 
-        def fetch_all_info_for_employee(employee: Staff, index: int):
-            logger.info(
-                f"Fetching employee data for {employee.id} {index} / {self.num_staff}"
-            )
-
-            with ThreadPoolExecutor(max_workers=5) as executor:
-                tasks = {}
-                tasks[executor.submit(self.fetch_employee, employee)] = "employee"
-                tasks[executor.submit(self.fetch_skills, employee)] = "skills"
-                tasks[executor.submit(self.fetch_experiences, employee)] = "experiences"
-                tasks[executor.submit(self.fetch_certifications, employee)] = (
-                    "certifications"
-                )
-                tasks[executor.submit(self.fetch_schools, employee)] = "schools"
-
-                for future in as_completed(tasks):
-                    result = future.result()
-                    if isinstance(result, TooManyRequests):
-                        logger.debug(f"API rate limit exceeded for {tasks[future]}")
-                        raise TooManyRequests(
-                            f"Stopping due to API rate limit exceeded for {tasks[future]}"
-                        )
-
         if extra_profile_data:
             try:
                 for i, employee in enumerate(non_restricted, start=1):
-                    fetch_all_info_for_employee(employee, i)
+                    self.fetch_all_info_for_employee(employee, i)
             except (BadCookies, TooManyRequests) as e:
                 logger.error(str(e))
 
         return reduced_staff_list
 
-    def parse_multi_exp(self, entity):
-        exps = []
-        company = entity["titleV2"]["text"]["text"]
-        elements = entity["subComponents"]["components"][0]["components"][
-            "pagedListComponent"
-        ]["components"]["elements"]
-        for elem in elements:
-            entity = elem["components"]["entityComponent"]
-            duration = entity["caption"]["text"]
-            title = entity["titleV2"]["text"]["text"]
-            emp_type = (
-                entity["subtitle"]["text"].lower() if entity["subtitle"] else None
+    def fetch_all_info_for_employee(self, employee: Staff, index: int):
+        """Simultaniously fetch all the data for an employee"""
+        logger.info(
+            f"Fetching employee data for {employee.id} {index} / {self.num_staff}"
+        )
+
+        with ThreadPoolExecutor(max_workers=5) as executor:
+            tasks = {}
+            tasks[
+                executor.submit(self.employees.fetch_employee, employee, self.domain)
+            ] = "employee"
+            tasks[executor.submit(self.skills.fetch_skills, employee)] = "skills"
+            tasks[executor.submit(self.experiences.fetch_experiences, employee)] = (
+                "experiences"
             )
-            location = entity["metadata"]["text"] if entity["metadata"] else None
-            from_date, to_date = utils.parse_duration(duration)
-            if from_date:
-                duration = duration.split(" · ")[1]
-            exp = Experience(
-                duration=duration,
-                title=title,
-                company=company,
-                emp_type=emp_type,
-                from_date=from_date,
-                to_date=to_date,
-                location=location,
+            tasks[executor.submit(self.certs.fetch_certifications, employee)] = (
+                "certifications"
             )
-            exps.append(exp)
-        return exps
-
-    def parse_experiences(self, elements):
-        exps = []
-        for elem in elements:
-            entity = elem["components"]["entityComponent"]
-            try:
-                if (
-                    not entity["subComponents"]
-                    or not entity["subComponents"]["components"][0]["components"][
-                        "pagedListComponent"
-                    ]
-                ):
-                    emp_type = None
-                    duration = entity["caption"]["text"]
-                    from_date, to_date = utils.parse_duration(duration)
-                    if from_date:
-                        duration = duration.split(" · ")[1]
-                    company = entity["subtitle"]["text"] if entity["subtitle"] else None
-                    title = entity["titleV2"]["text"]["text"]
-                    location = (
-                        entity["metadata"]["text"] if entity["metadata"] else None
-                    )
-                    parts = company.split(" · ")
-                    if len(parts) > 1:
-                        company = parts[0]
-                        emp_type = parts[-1].lower()
-                    exp = Experience(
-                        duration=duration,
-                        title=title,
-                        company=company,
-                        emp_type=emp_type,
-                        from_date=from_date,
-                        to_date=to_date,
-                        location=location,
-                    )
-                    exps.append(exp)
-
-                else:
-                    multi_exps = self.parse_multi_exp(entity)
-                    exps += multi_exps
-
-            except Exception as e:
-                logger.exception(e)
-
-        return exps
-
-    def parse_skills(self, sections):
-        skills = []
-        for section in sections:
-            elems = section["subComponent"]["components"]["pagedListComponent"][
-                "components"
-            ]["elements"]
-            for elem in elems:
-                entity = elem["components"]["entityComponent"]
-                skill = entity["titleV2"]["text"]["text"]
-                try:
-                    endorsements = int(
-                        entity["subComponents"]["components"][0]["components"][
-                            "insightComponent"
-                        ]["text"]["text"]["text"].replace(" endorsements", "")
+            tasks[executor.submit(self.schools.fetch_schools, employee)] = "schools"
+
+            for future in as_completed(tasks):
+                result = future.result()
+                if isinstance(result, TooManyRequests):
+                    logger.debug(f"API rate limit exceeded for {tasks[future]}")
+                    raise TooManyRequests(
+                        f"Stopping due to API rate limit exceeded for {tasks[future]}"
                     )
-                except:
-                    endorsements = None
-                skills.append(Skill(name=skill, endorsements=endorsements))
-        return skills
diff --git a/staffspy/linkedin/certifications.py b/staffspy/linkedin/certifications.py
new file mode 100644
index 0000000..70a9150
--- /dev/null
+++ b/staffspy/linkedin/certifications.py
@@ -0,0 +1,79 @@
+import json
+import logging
+
+from exceptions import TooManyRequests
+from models import Certification
+
+logger = logging.getLogger(__name__)
+
+
+class CertificationFetcher:
+    def __init__(self, session):
+        self.session = session
+        self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:certifications,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
+
+    def fetch_certifications(self, staff):
+        ep = self.endpoint.format(employee_id=staff.id)
+        res = self.session.get(ep)
+        logger.debug(f"certs, status code - {res.status_code}")
+        if res.status_code == 429:
+            raise TooManyRequests("429 Too Many Requests")
+        if not res.ok:
+            logger.debug(res.text[:200])
+            return False
+        try:
+            res_json = res.json()
+        except json.decoder.JSONDecodeError:
+            logger.debug(res.text[:200])
+            return False
+
+        try:
+            elems = res_json["data"]["identityDashProfileComponentsBySectionType"][
+                "elements"
+            ]
+        except (KeyError, IndexError, TypeError) as e:
+            logger.debug(res_json)
+            return False
+
+        if elems:
+            cert_elems = elems[0]["components"]["pagedListComponent"]["components"][
+                "elements"
+            ]
+            staff.certifications = self.parse_certifications(cert_elems)
+        return True
+
+    def parse_certifications(self, sections):
+        certs = []
+        for section in sections:
+            elem = section["components"]["entityComponent"]
+            if not elem:
+                break
+            title = elem["titleV2"]["text"]["text"]
+            issuer = elem["subtitle"]["text"] if elem["subtitle"] else None
+            date_issued = (
+                elem["caption"]["text"].replace("Issued ", "")
+                if elem["caption"]
+                else None
+            )
+            cert_id = (
+                elem["metadata"]["text"].replace("Credential ID ", "")
+                if elem["metadata"]
+                else None
+            )
+            try:
+                subcomp = elem["subComponents"]["components"][0]
+                cert_link = subcomp["components"]["actionComponent"]["action"][
+                    "navigationAction"
+                ]["actionTarget"]
+            except:
+                cert_link = None
+            cert = Certification(
+                title=title,
+                issuer=issuer,
+                date_issued=date_issued,
+                cert_link=cert_link,
+                cert_id=cert_id,
+            )
+            certs.append(cert)
+
+        return certs
diff --git a/staffspy/linkedin/employee.py b/staffspy/linkedin/employee.py
new file mode 100644
index 0000000..20fd0d1
--- /dev/null
+++ b/staffspy/linkedin/employee.py
@@ -0,0 +1,76 @@
+import json
+import logging
+
+import utils
+from exceptions import TooManyRequests
+
+logger = logging.getLogger(__name__)
+
+
+class EmployeeFetcher:
+    def __init__(self, session):
+        self.session = session
+        self.endpoint = "https://www.linkedin.com/voyager/api/voyagerIdentityDashProfiles?count=1&decorationId=com.linkedin.voyager.dash.deco.identity.profile.TopCardComplete-138&memberIdentity={employee_id}&q=memberIdentity"
+
+        self.domain = None
+
+    def fetch_employee(self, base_staff, domain):
+        self.domain = domain
+        ep = self.endpoint.format(employee_id=base_staff.id)
+        res = self.session.get(ep)
+        logger.debug(f"basic info, status code - {res.status_code}")
+        if res.status_code == 429:
+            return TooManyRequests("429 Too Many Requests")
+        if not res.ok:
+            logger.debug(res.text[:200])
+            return False
+        try:
+            res_json = res.json()
+        except json.decoder.JSONDecodeError:
+            logger.debug(res.text[:200])
+            return False
+
+        try:
+            employee_json = res_json["elements"][0]
+        except (KeyError, IndexError, TypeError):
+            logger.debug(res_json)
+            return False
+
+        self.parse_emp(base_staff, employee_json)
+        return True
+
+    def parse_emp(self, emp, emp_dict):
+        """Parse the employee data from the employee profile."""
+        try:
+            photo_data = emp_dict["profilePicture"]["displayImageReference"][
+                "vectorImage"
+            ]
+            photo_base_url = photo_data["rootUrl"]
+            photo_ext_url = photo_data["artifacts"][-1]["fileIdentifyingUrlPathSegment"]
+            profile_photo = f"{photo_base_url}{photo_ext_url}"
+        except:
+            profile_photo = None
+
+        emp.profile_id = emp_dict["publicIdentifier"]
+
+        emp.profile_link = f'https://www.linkedin.com/in/{emp_dict["publicIdentifier"]}'
+
+        emp.profile_photo = profile_photo
+        emp.first_name = emp_dict["firstName"]
+        emp.last_name = emp_dict["lastName"]
+        emp.potential_email = utils.create_email(
+            emp.first_name, emp.last_name, self.domain
+        )
+
+        emp.followers = emp_dict.get("followingState", {}).get("followerCount")
+        emp.connections = emp_dict["connections"]["paging"]["total"]
+        emp.location = emp_dict["geoLocation"]["geo"]["defaultLocalizedName"]
+        emp.company = emp_dict["profileTopPosition"]["elements"][0]["companyName"]
+        edu_cards = emp_dict["profileTopEducation"]["elements"]
+        if edu_cards:
+            emp.school = edu_cards[0].get(
+                "schoolName", edu_cards[0].get("school", {}).get("name")
+            )
+        emp.influencer = emp_dict["influencer"]
+        emp.creator = emp_dict["creator"]
+        emp.premium = emp_dict["premium"]
diff --git a/staffspy/linkedin/experiences.py b/staffspy/linkedin/experiences.py
new file mode 100644
index 0000000..5c8ca29
--- /dev/null
+++ b/staffspy/linkedin/experiences.py
@@ -0,0 +1,116 @@
+import json
+import logging
+
+import utils
+from exceptions import TooManyRequests
+from models import Experience
+
+logger = logging.getLogger(__name__)
+
+
+class ExperiencesFetcher:
+    def __init__(self, session):
+        self.session = session
+        self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:experience,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
+
+    def fetch_experiences(self, staff):
+        ep = self.endpoint.format(employee_id=staff.id)
+        res = self.session.get(ep)
+        logger.debug(f"exps, status code - {res.status_code}")
+        if res.status_code == 429:
+            return TooManyRequests("429 Too Many Requests")
+        if not res.ok:
+            logger.debug(res.text[:200])
+            return False
+        try:
+            res_json = res.json()
+        except json.decoder.JSONDecodeError:
+            logger.debug(res.text[:200])
+            return False
+
+        try:
+            skills_json = res_json["data"][
+                "identityDashProfileComponentsBySectionType"
+            ]["elements"][0]["components"]["pagedListComponent"]["components"][
+                "elements"
+            ]
+        except (KeyError, IndexError, TypeError) as e:
+            logger.debug(res_json)
+            return False
+
+        staff.experiences = self.parse_experiences(skills_json)
+        return True
+
+    def parse_experiences(self, elements):
+        exps = []
+        for elem in elements:
+            entity = elem["components"]["entityComponent"]
+            try:
+                if (
+                    not entity["subComponents"]
+                    or not entity["subComponents"]["components"][0]["components"][
+                        "pagedListComponent"
+                    ]
+                ):
+                    emp_type = None
+                    duration = entity["caption"]["text"]
+                    from_date, to_date = utils.parse_duration(duration)
+                    if from_date:
+                        duration = duration.split(" · ")[1]
+                    company = entity["subtitle"]["text"] if entity["subtitle"] else None
+                    title = entity["titleV2"]["text"]["text"]
+                    location = (
+                        entity["metadata"]["text"] if entity["metadata"] else None
+                    )
+                    parts = company.split(" · ")
+                    if len(parts) > 1:
+                        company = parts[0]
+                        emp_type = parts[-1].lower()
+                    exp = Experience(
+                        duration=duration,
+                        title=title,
+                        company=company,
+                        emp_type=emp_type,
+                        from_date=from_date,
+                        to_date=to_date,
+                        location=location,
+                    )
+                    exps.append(exp)
+
+                else:
+                    multi_exps = self.parse_multi_exp(entity)
+                    exps += multi_exps
+
+            except Exception as e:
+                logger.exception(e)
+
+        return exps
+
+    def parse_multi_exp(self, entity):
+        exps = []
+        company = entity["titleV2"]["text"]["text"]
+        elements = entity["subComponents"]["components"][0]["components"][
+            "pagedListComponent"
+        ]["components"]["elements"]
+        for elem in elements:
+            entity = elem["components"]["entityComponent"]
+            duration = entity["caption"]["text"]
+            title = entity["titleV2"]["text"]["text"]
+            emp_type = (
+                entity["subtitle"]["text"].lower() if entity["subtitle"] else None
+            )
+            location = entity["metadata"]["text"] if entity["metadata"] else None
+            from_date, to_date = utils.parse_duration(duration)
+            if from_date:
+                duration = duration.split(" · ")[1]
+            exp = Experience(
+                duration=duration,
+                title=title,
+                company=company,
+                emp_type=emp_type,
+                from_date=from_date,
+                to_date=to_date,
+                location=location,
+            )
+            exps.append(exp)
+        return exps
diff --git a/staffspy/linkedin/schools.py b/staffspy/linkedin/schools.py
new file mode 100644
index 0000000..bad0432
--- /dev/null
+++ b/staffspy/linkedin/schools.py
@@ -0,0 +1,58 @@
+import json
+import logging
+
+from exceptions import TooManyRequests
+from models import School
+
+logger = logging.getLogger(__name__)
+
+
+class SchoolsFetcher:
+    def __init__(self, session):
+        self.session = session
+        self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:education,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
+
+    def fetch_schools(self, staff):
+        ep = self.endpoint.format(employee_id=staff.id)
+        res = self.session.get(ep)
+        logger.debug(f"schools, status code - {res.status_code}")
+        if res.status_code == 429:
+            return TooManyRequests("429 Too Many Requests")
+
+        if not res.ok:
+            logger.debug(res.text[:200])
+            return False
+        try:
+            res_json = res.json()
+        except json.decoder.JSONDecodeError:
+            logger.debug(res.text[:200])
+            return False
+
+        try:
+            elements = res_json["data"]["identityDashProfileComponentsBySectionType"][
+                "elements"
+            ][0]["components"]["pagedListComponent"]["components"]["elements"]
+        except (KeyError, IndexError, TypeError) as e:
+            logger.debug(res_json)
+            return False
+
+        staff.schools = self.parse_schools(elements)
+        return True
+
+    def parse_schools(self, elements):
+        schools = []
+        for elem in elements:
+            entity = elem["components"]["entityComponent"]
+            if not entity:
+                break
+            years = entity["caption"]["text"] if entity["caption"] else None
+            school_name = entity["titleV2"]["text"]["text"]
+            degree = entity["subtitle"]["text"] if entity["subtitle"] else None
+            school = School(
+                years=years,
+                school=school_name,
+                degree=degree,
+            )
+            schools.append(school)
+
+        return schools
diff --git a/staffspy/linkedin/skills.py b/staffspy/linkedin/skills.py
new file mode 100644
index 0000000..f425fca
--- /dev/null
+++ b/staffspy/linkedin/skills.py
@@ -0,0 +1,56 @@
+import json
+import logging
+
+from exceptions import TooManyRequests
+from models import Skill
+
+logger = logging.getLogger(__name__)
+
+
+class SkillsFetcher:
+    def __init__(self, session):
+        self.session = session
+        self.endpoint = "https://www.linkedin.com/voyager/api/graphql?queryId=voyagerIdentityDashProfileComponents.277ba7d7b9afffb04683953cede751fb&queryName=ProfileComponentsBySectionType&variables=(tabIndex:0,sectionType:skills,profileUrn:urn%3Ali%3Afsd_profile%3A{employee_id},count:50)"
+
+    def fetch_skills(self, staff):
+        ep = self.endpoint.format(employee_id=staff.id)
+        res = self.session.get(ep)
+        logger.debug(f"skills, status code - {res.status_code}")
+        if res.status_code == 429:
+            return TooManyRequests("429 Too Many Requests")
+        if not res.ok:
+            logger.debug(res.text[:200])
+            return False
+        try:
+            res_json = res.json()
+        except json.decoder.JSONDecodeError:
+            logger.debug(res.text[:200])
+            return False
+
+        tab_comp = res_json["data"]["identityDashProfileComponentsBySectionType"][
+            "elements"
+        ][0]["components"]["tabComponent"]
+        if tab_comp:
+            sections = tab_comp["sections"]
+            staff.skills = self.parse_skills(sections)
+        return True
+
+    def parse_skills(self, sections):
+        skills = []
+        for section in sections:
+            elems = section["subComponent"]["components"]["pagedListComponent"][
+                "components"
+            ]["elements"]
+            for elem in elems:
+                entity = elem["components"]["entityComponent"]
+                skill = entity["titleV2"]["text"]["text"]
+                try:
+                    endorsements = int(
+                        entity["subComponents"]["components"][0]["components"][
+                            "insightComponent"
+                        ]["text"]["text"]["text"].replace(" endorsements", "")
+                    )
+                except:
+                    endorsements = None
+                skills.append(Skill(name=skill, endorsements=endorsements))
+        return skills