From e7880e98ca46133fbeec79994c0c8f807cf29ba6 Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Mon, 7 Aug 2023 21:30:37 +0530
Subject: [PATCH 1/8] added get similarity score

---
 scripts/similarity/get_similarity_score.py | 104 +++++++++++++++++++++
 1 file changed, 104 insertions(+)
 create mode 100644 scripts/similarity/get_similarity_score.py

diff --git a/scripts/similarity/get_similarity_score.py b/scripts/similarity/get_similarity_score.py
new file mode 100644
index 00000000..09883314
--- /dev/null
+++ b/scripts/similarity/get_similarity_score.py
@@ -0,0 +1,104 @@
+import json
+import os
+
+import cohere
+import yaml
+from qdrant_client import QdrantClient, models
+from qdrant_client.http.models import Batch
+
+cwd = os.path.join("/home", "subramanyam24", "projects", "Resume-Matcher")
+READ_RESUME_FROM = os.path.join(cwd, 'Data', 'Processed', 'Resumes')
+READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, 'Data', 'Processed', 'JobDescription')
+config_path = os.path.join(cwd, "scripts", "similarity")
+
+
+def read_config(filepath):
+    with open(filepath) as f:
+        config = yaml.safe_load(f)
+    return config
+
+
+def read_doc(path):
+    with open(path) as f:
+        try:
+            data = json.load(f)
+        except Exception as e:
+            print(f'Error reading JSON file: {e}')
+            data = {}
+    return data
+
+
+class QdrantSearch:
+    def __init__(self, resumes, jd):
+        config = read_config(config_path + "/config.yml")
+        self.cohere_key = config['cohere']['api_key']
+        self.qdrant_key = config['qdrant']['api_key']
+        self.qdrant_url = config['qdrant']['url']
+        self.resumes = resumes
+        self.jd = jd
+
+        self.cohere = cohere.Client(self.cohere_key)
+
+        self.qdrant = QdrantClient(
+            url=self.qdrant_url,
+            api_key=self.qdrant_key,
+        )
+
+        vector_size = 4096
+        self.qdrant.recreate_collection(
+            collection_name="collection_resume_matcher",
+            vectors_config=models.VectorParams(
+                size=vector_size,
+                distance=models.Distance.COSINE
+            )
+        )
+
+    def get_embedding(self, text):
+        embeddings = self.cohere.embed([text], "large").embeddings
+        return list(map(float, embeddings[0])), len(embeddings[0])
+
+    def update_qdrant(self):
+        vectors = []
+        ids = []
+        for i, resume in enumerate(self.resumes):
+            vector, size = self.get_embedding(resume)
+            vectors.append(vector)
+            ids.append(i)
+
+        self.qdrant.upsert(
+            collection_name="collection_resume_matcher",
+            points=Batch(
+                ids=ids,
+                vectors=vectors,
+                payloads=[{"text": resume} for resume in self.resumes]
+
+            )
+        )
+
+    def search(self):
+        vector, _ = self.get_embedding(self.jd)
+
+        hits = self.qdrant.search(
+            collection_name="collection_resume_matcher",
+            query_vector=vector,
+            limit=30
+        )
+        results = []
+        for hit in hits:
+            result = {
+                'text': str(hit.payload)[:30],
+                'score': hit.score
+            }
+            results.append(result)
+
+        return results
+
+
+def get_similarity_score(resume_string, jd_string):
+    qdrant_search = QdrantSearch([resume_string], jd_string)
+    qdrant_search.update_qdrant()
+    results = qdrant_search.search()
+    return results
+
+
+

From 6f38fbfccef4de03aac2c1d7e35893185d018b36 Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Mon, 7 Aug 2023 22:31:37 +0530
Subject: [PATCH 2/8] updated similarity score function with proper path

---
 scripts/similarity/get_similarity_score.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/scripts/similarity/get_similarity_score.py b/scripts/similarity/get_similarity_score.py
index 09883314..40ecb472 100644
--- a/scripts/similarity/get_similarity_score.py
+++ b/scripts/similarity/get_similarity_score.py
@@ -6,7 +6,21 @@
 from qdrant_client import QdrantClient, models
 from qdrant_client.http.models import Batch
 
-cwd = os.path.join("/home", "subramanyam24", "projects", "Resume-Matcher")
+
+def find_path(folder_name):
+    curr_dir = os.getcwd()
+    while True:
+        if folder_name in os.listdir(curr_dir):
+            return os.path.join(curr_dir, folder_name)
+        else:
+            parent_dir = os.path.dirname(curr_dir)
+            if parent_dir == '/':
+                break
+            curr_dir = parent_dir
+    raise ValueError(f"Folder '{folder_name}' not found.")
+
+
+cwd = find_path('Resume-Matcher')
 READ_RESUME_FROM = os.path.join(cwd, 'Data', 'Processed', 'Resumes')
 READ_JOB_DESCRIPTION_FROM = os.path.join(cwd, 'Data', 'Processed', 'JobDescription')
 config_path = os.path.join(cwd, "scripts", "similarity")
@@ -101,4 +115,3 @@ def get_similarity_score(resume_string, jd_string):
     return results
 
 
-

From 65dd1b912704783aec6918f7d802dc80007d47c9 Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Mon, 7 Aug 2023 22:33:40 +0530
Subject: [PATCH 3/8] added similarity score code

---
 streamlit_app.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/streamlit_app.py b/streamlit_app.py
index bda7e0c6..6396b589 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -1,15 +1,19 @@
-import networkx as nx
+import json
 from typing import List
-import streamlit as st
+
+import networkx as nx
+import nltk
 import pandas as pd
-import json
 import plotly.express as px
 import plotly.graph_objects as go
-from scripts.utils.ReadFiles import get_filenames_from_dir
-from streamlit_extras import add_vertical_space as avs
+import streamlit as st
 from annotated_text import annotated_text, parameters
+from streamlit_extras import add_vertical_space as avs
 from streamlit_extras.badges import badge
-import nltk
+
+from scripts.similarity.get_similarity_score import get_similarity_score
+from scripts.utils.ReadFiles import get_filenames_from_dir
+
 
 try:
     nltk.data.find('tokenizers/punkt')
@@ -32,7 +36,7 @@ def create_star_graph(nodes_and_weights, title):
     # Add nodes and edges with weights to the graph
     for node, weight in nodes_and_weights:
         G.add_node(node)
-        G.add_edge(central_node, node, weight=weight*100)
+        G.add_edge(central_node, node, weight=weight * 100)
 
     # Get position layout for nodes
     pos = nx.spring_layout(G)
@@ -142,12 +146,12 @@ def tokenize_string(input_string):
 
 st.write("There are", len(resume_names),
          " resumes present. Please select one from the menu below:")
-output = st.slider('Select Resume Number', 0, len(resume_names)-1, 2)
+output = st.slider('Select Resume Number', 0, len(resume_names) - 1, 2)
 
 avs.add_vertical_space(5)
 
 st.write("You have selected ", resume_names[output], " printing the resume")
-selected_file = read_json("Data/Processed/Resumes/"+resume_names[output])
+selected_file = read_json("Data/Processed/Resumes/" + resume_names[output])
 
 avs.add_vertical_space(2)
 st.markdown("#### Parsed Resume Data")
@@ -165,6 +169,8 @@ def tokenize_string(input_string):
     selected_file["clean_data"], selected_file["extracted_keywords"],
     "KW", "#0B666A"))
 
+resume_string = ' '.join(selected_file["extracted_keywords"])
+
 avs.add_vertical_space(5)
 st.write("Now let's take a look at the extracted entities from the resume.")
 
@@ -176,7 +182,7 @@ def tokenize_string(input_string):
 # Create the dictionary
 keyword_dict = {}
 for keyword, value in selected_file['keyterms']:
-    keyword_dict[keyword] = value*100
+    keyword_dict[keyword] = value * 100
 
 fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
                                            font=dict(size=12),
@@ -202,14 +208,14 @@ def tokenize_string(input_string):
 st.write("There are", len(job_descriptions),
          " resumes present. Please select one from the menu below:")
 output = st.slider('Select Job Description Number',
-                   0, len(job_descriptions)-1, 2)
+                   0, len(job_descriptions) - 1, 2)
 
 avs.add_vertical_space(5)
 
 st.write("You have selected ",
          job_descriptions[output], " printing the job description")
 selected_jd = read_json(
-    "Data/Processed/JobDescription/"+job_descriptions[output])
+    "Data/Processed/JobDescription/" + job_descriptions[output])
 
 avs.add_vertical_space(2)
 st.markdown("#### Job Description")
@@ -224,6 +230,7 @@ def tokenize_string(input_string):
 annotated_text(create_annotated_text(
     selected_file["clean_data"], selected_jd["extracted_keywords"],
     "JD", "#F24C3D"))
+jd_string = ' '.join(selected_jd["extracted_keywords"])
 
 st.write("Now let's take a look at the extracted entities from the job description.")
 
@@ -235,7 +242,7 @@ def tokenize_string(input_string):
 # Create the dictionary
 keyword_dict = {}
 for keyword, value in selected_jd['keyterms']:
-    keyword_dict[keyword] = value*100
+    keyword_dict[keyword] = value * 100
 
 fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
                                            font=dict(size=12),
@@ -256,6 +263,10 @@ def tokenize_string(input_string):
 
 avs.add_vertical_space(3)
 
+result = get_similarity_score(resume_string, jd_string)
+similarity_score=result[0]["score"]
+st.write("Similarity Score obtained for the resume and job description is:", similarity_score)
+
 st.title(':blue[Resume Matcher]')
 st.subheader(
     'Free and Open Source ATS to help your resume pass the screening stage.')

From b4077df149369e36f411f00eacbe212492e45bb3 Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Tue, 8 Aug 2023 09:07:10 +0530
Subject: [PATCH 4/8] resolved conflicts and added logging

---
 scripts/similarity/get_similarity_score.py | 101 ++++++++++++++++-----
 1 file changed, 79 insertions(+), 22 deletions(-)

diff --git a/scripts/similarity/get_similarity_score.py b/scripts/similarity/get_similarity_score.py
index 40ecb472..feec9dfd 100644
--- a/scripts/similarity/get_similarity_score.py
+++ b/scripts/similarity/get_similarity_score.py
@@ -1,4 +1,5 @@
 import json
+import logging
 import os
 
 import cohere
@@ -6,6 +7,28 @@
 from qdrant_client import QdrantClient, models
 from qdrant_client.http.models import Batch
 
+logging.basicConfig(
+    filename='app_similarity_score.log',
+    filemode='w',
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.DEBUG)
+
+console_handler = logging.StreamHandler()
+formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+console_handler.setFormatter(formatter)
+console_handler.setLevel(logging.DEBUG)
+
+file_handler = logging.FileHandler("app_similarity_score.log")
+file_handler.setLevel(logging.DEBUG)
+file_handler.setFormatter(formatter)
+
+logger.addHandler(file_handler)
+logger.addHandler(console_handler)
+
 
 def find_path(folder_name):
     curr_dir = os.getcwd()
@@ -27,9 +50,17 @@ def find_path(folder_name):
 
 
 def read_config(filepath):
-    with open(filepath) as f:
-        config = yaml.safe_load(f)
-    return config
+    try:
+        with open(filepath) as f:
+            config = yaml.safe_load(f)
+        return config
+    except FileNotFoundError as e:
+        logger.error(f"Configuration file {filepath} not found: {e}")
+    except yaml.YAMLError as e:
+        logger.error(f"Error parsing YAML in configuration file {filepath}: {e}", exc_info=True)
+    except Exception as e:
+        logger.error(f"Error reading configuration file {filepath}: {e}")
+    return None
 
 
 def read_doc(path):
@@ -37,7 +68,7 @@ def read_doc(path):
         try:
             data = json.load(f)
         except Exception as e:
-            print(f'Error reading JSON file: {e}')
+            logger.error(f'Error reading JSON file: {e}')
             data = {}
     return data
 
@@ -50,7 +81,6 @@ def __init__(self, resumes, jd):
         self.qdrant_url = config['qdrant']['url']
         self.resumes = resumes
         self.jd = jd
-
         self.cohere = cohere.Client(self.cohere_key)
 
         self.qdrant = QdrantClient(
@@ -67,9 +97,17 @@ def __init__(self, resumes, jd):
             )
         )
 
+        self.logger = logging.getLogger(self.__class__.__name__)
+
+        self.logger.addHandler(console_handler)
+        self.logger.addHandler(file_handler)
+
     def get_embedding(self, text):
-        embeddings = self.cohere.embed([text], "large").embeddings
-        return list(map(float, embeddings[0])), len(embeddings[0])
+        try:
+            embeddings = self.cohere.embed([text], "large").embeddings
+            return list(map(float, embeddings[0])), len(embeddings[0])
+        except Exception as e:
+            self.logger.error(f"Error getting embeddings: {e}", exc_info=True)
 
     def update_qdrant(self):
         vectors = []
@@ -78,16 +116,18 @@ def update_qdrant(self):
             vector, size = self.get_embedding(resume)
             vectors.append(vector)
             ids.append(i)
-
-        self.qdrant.upsert(
-            collection_name="collection_resume_matcher",
-            points=Batch(
-                ids=ids,
-                vectors=vectors,
-                payloads=[{"text": resume} for resume in self.resumes]
-
+        try:
+            self.qdrant.upsert(
+                collection_name="collection_resume_matcher",
+                points=Batch(
+                    ids=ids,
+                    vectors=vectors,
+                    payloads=[{"text": resume} for resume in self.resumes]
+
+                )
             )
-        )
+        except Exception as e:
+            self.logger.error(f"Error upserting the vectors to the qdrant collection: {e}", exc_info=True)
 
     def search(self):
         vector, _ = self.get_embedding(self.jd)
@@ -108,10 +148,27 @@ def search(self):
         return results
 
 
-def get_similarity_score(resume_string, jd_string):
-    qdrant_search = QdrantSearch([resume_string], jd_string)
+def get_similarity_score(resume_string, job_description_string):
+    logger.info("Started getting similarity score")
+    qdrant_search = QdrantSearch([resume_string], job_description_string)
     qdrant_search.update_qdrant()
-    results = qdrant_search.search()
-    return results
-
-
+    search_result = qdrant_search.search()
+    logger.info("Finished getting similarity score")
+    return search_result
+
+
+if __name__ == "__main__":
+    # To give your custom resume use this code
+    resume_dict = read_config(
+        READ_RESUME_FROM + "/Resume-bruce_wayne_fullstack.pdf4783d115-e6fc-462e-ae4d-479152884b28.json")
+    job_dict = read_config(
+        READ_JOB_DESCRIPTION_FROM + "/JobDescription-job_desc_full_stack_engineer_pdf4de00846-a4fe-4fe5-a4d7"
+                                    "-2a8a1b9ad020.json")
+    resume_keywords = resume_dict["extracted_keywords"]
+    job_description_keywords = job_dict["extracted_keywords"]
+
+    resume_string = ' '.join(resume_keywords)
+    jd_string = ' '.join(job_description_keywords)
+    final_result = get_similarity_score(resume_string, jd_string)
+    for r in final_result:
+        print(r)

From 8cb2e6d4637695e0046a4495a4aadba0c65d9928 Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Tue, 8 Aug 2023 14:27:25 +0530
Subject: [PATCH 5/8] updated requirements with qdrant client

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 303d638d..53b192f9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -108,4 +108,5 @@ wasabi==1.1.2
 watchdog==3.0.0
 zipp==3.16.2
 
-cohere~=4.19.2
\ No newline at end of file
+cohere~=4.19.2
+qdrant-client
\ No newline at end of file

From d5a8a9b24204b81cf173b3fdec85a2f8255d9baa Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Tue, 8 Aug 2023 17:42:14 +0530
Subject: [PATCH 6/8] added functionality to continue if there is no config.yml
 file

---
 streamlit_app.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/streamlit_app.py b/streamlit_app.py
index 6052b1e7..91812aa0 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -1,4 +1,5 @@
 import json
+import os
 from typing import List
 
 import networkx as nx
@@ -11,9 +12,11 @@
 from streamlit_extras import add_vertical_space as avs
 from streamlit_extras.badges import badge
 
-from scripts.similarity.get_similarity_score import get_similarity_score
+from scripts.similarity.get_similarity_score import get_similarity_score, find_path, read_config
 from scripts.utils.ReadFiles import get_filenames_from_dir
 
+cwd = find_path('Resume-Matcher')
+config_path = os.path.join(cwd, "scripts", "similarity")
 
 try:
     nltk.data.find('tokenizers/punkt')
@@ -148,8 +151,8 @@ def tokenize_string(input_string):
 
 if len(resume_names) > 1:
     st.write("There are", len(resume_names),
-         " resumes present. Please select one from the menu below:")
-    output = st.slider('Select Resume Number', 0, len(resume_names)-1, 0)
+             " resumes present. Please select one from the menu below:")
+    output = st.slider('Select Resume Number', 0, len(resume_names) - 1, 0)
 else:
     st.write("There is 1 resume present")
 
@@ -174,8 +177,6 @@ def tokenize_string(input_string):
     selected_file["clean_data"], selected_file["extracted_keywords"],
     "KW", "#0B666A"))
 
-resume_string = ' '.join(selected_file["extracted_keywords"])
-
 avs.add_vertical_space(5)
 st.write("Now let's take a look at the extracted entities from the resume.")
 
@@ -213,9 +214,9 @@ def tokenize_string(input_string):
 output = 0
 if len(job_descriptions) > 1:
     st.write("There are", len(job_descriptions),
-         " resumes present. Please select one from the menu below:")
+             " resumes present. Please select one from the menu below:")
     output = st.slider('Select Job Description Number',
-                    0, len(job_descriptions)-1, 0)
+                       0, len(job_descriptions) - 1, 0)
 else:
     st.write("There is 1 job description present")
 
@@ -239,7 +240,6 @@ def tokenize_string(input_string):
 annotated_text(create_annotated_text(
     selected_file["clean_data"], selected_jd["extracted_keywords"],
     "JD", "#F24C3D"))
-jd_string = ' '.join(selected_jd["extracted_keywords"])
 
 st.write("Now let's take a look at the extracted entities from the job description.")
 
@@ -272,9 +272,19 @@ def tokenize_string(input_string):
 
 avs.add_vertical_space(3)
 
-result = get_similarity_score(resume_string, jd_string)
-similarity_score=result[0]["score"]
-st.write("Similarity Score obtained for the resume and job description is:", similarity_score)
+config_file_path = config_path + "/config.yml"
+if os.path.exists(config_file_path):
+    config_data = read_config(config_file_path)
+    if config_data:
+        print("Config file parsed successfully:")
+        resume_string = ' '.join(selected_file["extracted_keywords"])
+        jd_string = ' '.join(selected_jd["extracted_keywords"])
+        result = get_similarity_score(resume_string, jd_string)
+        similarity_score = result[0]["score"]
+        st.write("Similarity Score obtained for the resume and job description is:", similarity_score)
+else:
+    print("Config file does not exist.")
+
 
 st.title(':blue[Resume Matcher]')
 st.subheader(

From fe0861d13a3c8f44164a386fbf9998bb1f24ae1c Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Tue, 8 Aug 2023 17:49:00 +0530
Subject: [PATCH 7/8] updated code for resume_matcher notebook .

---
 archive/resume_matcher.ipynb | 76 +++++++++++++++++++-----------------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/archive/resume_matcher.ipynb b/archive/resume_matcher.ipynb
index 39fac1a4..0c5d50bc 100644
--- a/archive/resume_matcher.ipynb
+++ b/archive/resume_matcher.ipynb
@@ -17,32 +17,11 @@
   "cells": [
     {
       "cell_type": "code",
-      "execution_count": null,
+      "execution_count": 5,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "aHoRFk4LpFSZ",
-        "outputId": "0a950106-ea2a-498a-9dcc-e99458b1f139"
+        "id": "aHoRFk4LpFSZ"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m44.5/44.5 kB\u001b[0m \u001b[31m1.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.7/2.7 MB\u001b[0m \u001b[31m30.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m132.5/132.5 kB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.6/2.6 MB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m75.4/75.4 kB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m304.5/304.5 kB\u001b[0m \u001b[31m12.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m74.5/74.5 kB\u001b[0m \u001b[31m6.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m57.5/57.5 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m58.3/58.3 kB\u001b[0m \u001b[31m5.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25h"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "!pip install cohere --quiet\n",
         "!pip install qdrant-client  --quiet"
@@ -55,14 +34,27 @@
         "from qdrant_client import QdrantClient, models\n",
         "from qdrant_client.http.models import Batch\n",
         "import cohere\n",
+        "\n",
         "def read_config(filepath):\n",
-        "    with open(filepath) as f:\n",
-        "        config = yaml.safe_load(f)\n",
-        "    return config\n",
+        "    try:\n",
+        "        with open(filepath) as f:\n",
+        "            config = yaml.safe_load(f)\n",
+        "        return config\n",
+        "    except FileNotFoundError as e:\n",
+        "        print(f\"Configuration file {filepath} not found: {e}\")\n",
+        "    except yaml.YAMLError as e:\n",
+        "        print(f\"Error parsing YAML in configuration file {filepath}: {e}\", exc_info=True)\n",
+        "    except Exception as e:\n",
+        "        print(f\"Error reading configuration file {filepath}: {e}\")\n",
+        "    return None\n",
+        "\n",
         "\n",
         "class QdrantSearch:\n",
         "    def __init__(self, resumes, jd):\n",
         "        config = read_config(\"config.yml\")\n",
+        "\n",
+        "\n",
+        "\n",
         "        self.cohere_key = config['cohere']['api_key']\n",
         "        self.qdrant_key = config['qdrant']['api_key']\n",
         "        self.qdrant_url = config['qdrant']['url']\n",
@@ -128,7 +120,7 @@
       "metadata": {
         "id": "SXOgwcCATtww"
       },
-      "execution_count": null,
+      "execution_count": 6,
       "outputs": []
     },
     {
@@ -136,23 +128,26 @@
       "source": [
         "resumes = [\"Professional Summary Highly skilled MERN Stack Developer with over 10 years of experience specializing in designing building and maintaining complex web applications Proficient in MongoDB Expressjs React and Nodejs Currently contributing to the development of AI technologies at OpenAI with a primary focus on the ChatGPT project Skills JavaScript and TypeScript MongoDB Expressjs React Nodejs MERN stack RESTful APIs Git and GitHub Docker and Kubernetes Agile and Scrum Python and Machine Learning basics Experience June 2020 PresentMERN Stack Developer OpenAI San Francisco USA Working on the development of the ChatGPT project using Nodejs Expressjs and React Implementing RESTful services for communication between frontend and backend Utilizing Docker and Kubernetes for deployment and management of applications Working in an Agile environment delivering highquality software every sprint Contributing to the design and implementation of machine learning algorithms for natural language processing tasks July 2015 May 2020Full Stack Developer Uber San Francisco USA Developed and maintained scalable web applications using MERN stack Ensured the performance quality and responsiveness of applications Successfully deployed solutions using Docker and Kubernetes Collaborated with a team of engineers product managers and UX designers Led a team of junior developers conducted code reviews and ensured adherence to best coding practices Worked closely with the data science team to optimize recommendation algorithms and enhance user experience June 2012 June 2015Software Developer Facebook Menlo Park USA Developed features for the Facebook web application using React Ensured the performance of the MongoDB databases Utilized RESTful APIs for communication between different parts of the application Worked in a fastpaced testdriven development environment Assisted in migrating the legacy system to a modern MERN stack architecture Education 2009 2012 PhD in Computer Science CalTech Pasadena USA 2007 2009 Master of Science in Computer Science MIT Cambridge USA 2003 2007 Bachelor of Science in Computer Science UC San Diego San Diego USA 1/2 Projects 2019 PresentPersonal Project Gotham Event Planner Created a fullfeatured web application to plan and organize events in Gotham city Used MERN stack for development and Docker for deployment The application allows users to create manage and share events and integrates with Google Maps API to display event locations 2/2\"]\n",
         "job_description = \"Job Description Java Developer 3 Years of Experience Tech Solutions San Francisco CA USA About Us At Tech Solutions we believe in the power of technology to solve complex problems We are a dynamic forwardthinking tech company specializing in custom software solutions for various industries We are seeking a talented and experienced Java Developer to join our team Job Description We are seeking a skilled Java Developer with at least 3 years of experience in building highperforming scal able enterprisegrade applications You will be part of a talented software team that works on missioncritical applications Your roles and responsibilities will include managing Java/Java EE application development while providing expertise in the full software development lifecycle Responsibilities •Designing implementing and maintaining Java applications that are often highvolume and low latency required for missioncritical systems •Delivering high availability and performance •Contributing to all phases of the development lifecycle •Writing welldesigned efficient and testable code •Conducting software analysis programming testing and debugging •Ensuring designs comply with specifications •Preparing and producing releases of software components •Supporting continuous improvement by investigating alternatives and technologies and presenting these for architectural review Requirements •BS/MS degree in Computer Science Engineering or a related subject •Proven handson Software Development experience •Proven working experience in Java development •Handson experience in designing and developing applications using Java EE platforms •ObjectOriented Analysis and design using common design patterns •Profound insight of Java and JEE internals Classloading Memory Management Transaction man agement etc 1 •Excellent knowledge of Relational Databases SQL and ORM technologies JPA2 Hibernate •Experience in developing web applications using at least one popular web framework JSF Wicket GWT Spring MVC •Experience with testdriven development Benefits •Competitive salary package •Health dental and vision insurance •Retirement savings plan •Professional development opportunities •Flexible work hours Tech Solutions is proud to be an equal opportunity employer We celebrate diversity and are committed to creating an inclusive environment for all employees How to Apply To apply please submit your resume and a brief explanation of your relevant experience to 2\"\n",
+        "config = read_config(\"config.yml\")\n",
+        "if not config:\n",
+        "  print(\"Cannot process this as there is no config.yml\")\n",
+        "else:\n",
+        "  qdrant_search = QdrantSearch(resumes, job_description)\n",
         "\n",
-        "qdrant_search = QdrantSearch(resumes, job_description)\n",
+        "  qdrant_search.update_qdrant()\n",
         "\n",
-        "qdrant_search.update_qdrant()\n",
-        "\n",
-        "results = qdrant_search.search()\n",
-        "for r in results:\n",
-        "  print(r)"
+        "  results = qdrant_search.search()\n",
+        "  for r in results:\n",
+        "    print(r)"
       ],
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
         "id": "rlP3s5euo435",
-        "outputId": "3f4f15b6-d446-4491-d4d5-d9ba14a2a145"
+        "outputId": "389c00e7-8cd1-4dd6-f517-d923e3c4bf2a"
       },
-      "execution_count": null,
+      "execution_count": 10,
       "outputs": [
         {
           "output_type": "stream",
@@ -162,6 +157,15 @@
           ]
         }
       ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "WFdXngZkEyOm"
+      },
+      "execution_count": null,
+      "outputs": []
     }
   ]
 }
\ No newline at end of file

From 8113b8fd41cafa71f164bd54231bef09079f6275 Mon Sep 17 00:00:00 2001
From: Subramanyam Challa <challasubbu25@gmail.com>
Date: Thu, 10 Aug 2023 09:23:49 +0530
Subject: [PATCH 8/8] added documentation for qdrant and cohere api.

---
 README.md | 89 +++++++++++++++++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 40695cd0..e48441d2 100644
--- a/README.md
+++ b/README.md
@@ -57,65 +57,65 @@ Follow these steps to set up the environment and run the application.
 
 2. Clone the forked repository.
 
-    ```bash
-    git clone https://github.com/<YOUR-USERNAME>/Resume-Matcher.git
-    cd Resume-Matcher
-    ```
+   ```bash
+   git clone https://github.com/<YOUR-USERNAME>/Resume-Matcher.git
+   cd Resume-Matcher
+   ```
 
 3. Create a Python Virtual Environment:
 
-    - Using [virtualenv](https://learnpython.com/blog/how-to-use-virtualenv-python/):
+   - Using [virtualenv](https://learnpython.com/blog/how-to-use-virtualenv-python/):
 
-        _Note_: Check how to install virtualenv on your system here [link](https://learnpython.com/blog/how-to-use-virtualenv-python/).
+     _Note_: Check how to install virtualenv on your system here [link](https://learnpython.com/blog/how-to-use-virtualenv-python/).
 
-        ```bash
-        virtualenv env
-        ```
+     ```bash
+     virtualenv env
+     ```
 
-    **OR**
+   **OR**
 
-    - Create a Python Virtual Environment:
+   - Create a Python Virtual Environment:
 
-        ```bash
-        python -m venv env
-        ```
+     ```bash
+     python -m venv env
+     ```
 
 4. Activate the Virtual Environment.
 
-    - On Windows.
+   - On Windows.
 
-        ```bash
-        env\Scripts\activate
-        ```
+     ```bash
+     env\Scripts\activate
+     ```
 
-    - On macOS and Linux.
+   - On macOS and Linux.
 
-        ```bash
-        source env/bin/activate
-        ```
+     ```bash
+     source env/bin/activate
+     ```
 
 5. Install Dependencies:
 
-    ```bash
-    pip install -r requirements.txt
-    ```
+   ```bash
+   pip install -r requirements.txt
+   ```
 
 6. Prepare Data:
 
-    - Resumes: Place your resumes in PDF format in the `Data/Resumes` folder. Remove any existing contents in this folder.
-    - Job Descriptions: Place your job descriptions in PDF format in the `Data/JobDescription` folder. Remove any existing contents in this folder.
+   - Resumes: Place your resumes in PDF format in the `Data/Resumes` folder. Remove any existing contents in this folder.
+   - Job Descriptions: Place your job descriptions in PDF format in the `Data/JobDescription` folder. Remove any existing contents in this folder.
 
 7. Parse Resumes to JSON:
 
-    ```python
-    python run_first.py
-    ```
+   ```python
+   python run_first.py
+   ```
 
 8. Run the Application:
 
-    ```python
-    streamlit run streamlit_app.py
-    ```
+   ```python
+   streamlit run streamlit_app.py
+   ```
 
 **Note**: For local versions, you do not need to run "streamlit_second.py" as it is specifically for deploying to Streamlit servers.
 
@@ -127,12 +127,29 @@ Follow these steps to set up the environment and run the application.
 
 1. Build the image and start application
 
-    ```bash
-        docker-compose up
-    ```
+   ```bash
+       docker-compose up
+   ```
 
 2. Open `localhost:80` on your browser
 
+### Cohere and Qdrant
+
+1.  Visit [Cohere website registration](https://dashboard.cohere.ai/welcome/register) and create an account.
+2.  Go to API keys and copy your cohere api key.
+3.  Visit [Qdrant website](https://cloud.qdrant.io/) and create an account.
+4.  Get your api key and cluster url as well
+5.  Now create a yaml file named config.yml in Scripts/Similarity/ folder.
+6.  The format for the conifg file should be as below:
+    ```yaml
+    cohere:
+      api_key: cohere_key
+    qdrant:
+      api_key: qdrant_api_key
+      url: qdrant_cluster_url
+    ```
+7.  Please replace your values without any quotes.
+
 <br/>
 
 <div align="center">