From ad4977b652e401d78aa08048f98393c5eceba494 Mon Sep 17 00:00:00 2001
From: saurabhraidev <srbh077@gmail.com>
Date: Tue, 18 Jul 2023 11:08:42 +0530
Subject: [PATCH 1/2] Shifted extra code to archive folder

---
 Data.py => archive/Data.py                    |  16 +-
 {scripts/utils => archive}/ExtraScripts.py    |   0
 output_jobs.txt => archive/output_jobs.txt    |   0
 .../output_resume.txt                         |   0
 run.py => archive/run.py                      |   0
 archive/streamlit_app.py                      | 101 ++++++
 .../test_notebook.ipynb                       |   0
 streamlit_app.py                              | 328 +++++++++++++++---
 streamlit_second.py                           | 323 -----------------
 9 files changed, 391 insertions(+), 377 deletions(-)
 rename Data.py => archive/Data.py (61%)
 rename {scripts/utils => archive}/ExtraScripts.py (100%)
 rename output_jobs.txt => archive/output_jobs.txt (100%)
 rename output_resume.txt => archive/output_resume.txt (100%)
 rename run.py => archive/run.py (100%)
 create mode 100644 archive/streamlit_app.py
 rename test_notebook.ipynb => archive/test_notebook.ipynb (100%)
 delete mode 100644 streamlit_second.py

diff --git a/Data.py b/archive/Data.py
similarity index 61%
rename from Data.py
rename to archive/Data.py
index 111a5c33..aef01a88 100644
--- a/Data.py
+++ b/archive/Data.py
@@ -21,7 +21,21 @@ def build_resume_list(resume_names, path):
     return resumes
 
 
+def build_jobdesc_list(jobdesc_names, path):
+    resumes = []
+    for resume in resume_names:
+        selected_file = read_json(path + '/' + resume)
+        resumes.append({
+            "resume": selected_file["clean_data"]
+        })
+    return resumes
+
+
 resume_names = get_filenames_from_dir(resume_path)
 resumes = build_resume_list(resume_names, resume_path)
 
-print(resumes)  # To see the output.
+jobdesc_names = get_filenames_from_dir(job_path)
+jobdescs = build_jobdesc_list(jobdesc_names, job_path)
+
+print(resumes)
+print(jobdescs)
diff --git a/scripts/utils/ExtraScripts.py b/archive/ExtraScripts.py
similarity index 100%
rename from scripts/utils/ExtraScripts.py
rename to archive/ExtraScripts.py
diff --git a/output_jobs.txt b/archive/output_jobs.txt
similarity index 100%
rename from output_jobs.txt
rename to archive/output_jobs.txt
diff --git a/output_resume.txt b/archive/output_resume.txt
similarity index 100%
rename from output_resume.txt
rename to archive/output_resume.txt
diff --git a/run.py b/archive/run.py
similarity index 100%
rename from run.py
rename to archive/run.py
diff --git a/archive/streamlit_app.py b/archive/streamlit_app.py
new file mode 100644
index 00000000..c8b2dbfe
--- /dev/null
+++ b/archive/streamlit_app.py
@@ -0,0 +1,101 @@
+import string
+import spacy
+import pywaffle
+import streamlit as st
+import pandas as pd
+import json
+import plotly.express as px
+import plotly.graph_objects as go
+import matplotlib.pyplot as plt
+import squarify
+
+st.title('Resume :blue[Matcher]')
+st.image('Assets/img/header_image.jpg')
+st.subheader('_AI Based Resume Analyzer & Ranker_')
+
+
+def read_json(filename):
+    with open(filename) as f:
+        data = json.load(f)
+    return data
+
+
+# read the json file
+resume = read_json(
+    'Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json')
+job_desc = read_json(
+    'Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json')
+
+st.write("### Reading Resume's POS")
+df = pd.DataFrame(resume['pos_frequencies'], index=[0])
+fig = go.Figure(data=go.Bar(y=list(resume['pos_frequencies'].values()), x=list(resume['pos_frequencies'].keys())),
+                layout_title_text="Resume's POS")
+st.write(fig)
+
+df2 = pd.DataFrame(resume['keyterms'], columns=["keyword", "value"])
+st.dataframe(df2)
+
+# Create the dictionary
+keyword_dict = {}
+for keyword, value in resume['keyterms']:
+    keyword_dict[keyword] = value
+
+fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
+                                           font=dict(size=12),
+                                           fill_color='#070A52'),
+                               cells=dict(values=[list(keyword_dict.keys()),
+                                                  list(keyword_dict.values())],
+                                          line_color='darkslategray',
+                                          fill_color='#6DA9E4'))
+                      ])
+st.plotly_chart(fig)
+
+st.divider()
+
+for keyword, value in resume['keyterms']:
+    pass
+
+
+# display the waffle chart
+figure = plt.figure(
+    FigureClass=pywaffle.Waffle,
+    rows=20,
+    columns=20,
+    values=keyword_dict,
+    legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)})
+
+
+# Display the dictionary
+
+st.pyplot(fig=figure)
+# st.write(dict)
+
+fig = px.treemap(df2, path=['keyword'], values='value',
+                 color_continuous_scale='RdBu',
+                 title='Resume POS')
+st.write(fig)
+
+
+st.plotly_chart(figure_or_data=fig)
+
+fig = go.Figure(data=[go.Table(
+    header=dict(values=["Tri Grams"],
+                fill_color='#1D267D',
+                align='center', font=dict(color='white', size=16)),
+    cells=dict(values=[resume['tri_grams']],
+               fill_color='#19A7CE',
+               align='left'))])
+
+st.plotly_chart(figure_or_data=fig)
+
+fig = go.Figure(data=[go.Table(
+    header=dict(values=["Bi Grams"],
+                fill_color='#1D267D',
+                align='center', font=dict(color='white', size=16)),
+    cells=dict(values=[resume['bi_grams']],
+               fill_color='#19A7CE',
+               align='left'))])
+
+st.plotly_chart(figure_or_data=fig)
+
+
diff --git a/test_notebook.ipynb b/archive/test_notebook.ipynb
similarity index 100%
rename from test_notebook.ipynb
rename to archive/test_notebook.ipynb
diff --git a/streamlit_app.py b/streamlit_app.py
index c8b2dbfe..d663934b 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -1,17 +1,105 @@
-import string
-import spacy
-import pywaffle
+import networkx as nx
+from typing import List
 import streamlit as st
 import pandas as pd
 import json
 import plotly.express as px
 import plotly.graph_objects as go
-import matplotlib.pyplot as plt
-import squarify
+from scripts.utils.ReadFiles import get_filenames_from_dir
+from streamlit_extras import add_vertical_space as avs
+from annotated_text import annotated_text, parameters
+import nltk
+nltk.download('punkt')
 
-st.title('Resume :blue[Matcher]')
-st.image('Assets/img/header_image.jpg')
-st.subheader('_AI Based Resume Analyzer & Ranker_')
+parameters.SHOW_LABEL_SEPARATOR = False
+parameters.BORDER_RADIUS = 3
+parameters.PADDING = "0.5 0.25rem"
+
+
+def create_star_graph(nodes_and_weights, title):
+    # Create an empty graph
+    G = nx.Graph()
+
+    # Add the central node
+    central_node = "resume"
+    G.add_node(central_node)
+
+    # Add nodes and edges with weights to the graph
+    for node, weight in nodes_and_weights:
+        G.add_node(node)
+        G.add_edge(central_node, node, weight=weight*100)
+
+    # Get position layout for nodes
+    pos = nx.spring_layout(G)
+
+    # Create edge trace
+    edge_x = []
+    edge_y = []
+    for edge in G.edges():
+        x0, y0 = pos[edge[0]]
+        x1, y1 = pos[edge[1]]
+        edge_x.extend([x0, x1, None])
+        edge_y.extend([y0, y1, None])
+
+    edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(
+        width=0.5, color='#888'), hoverinfo='none', mode='lines')
+
+    # Create node trace
+    node_x = []
+    node_y = []
+    for node in G.nodes():
+        x, y = pos[node]
+        node_x.append(x)
+        node_y.append(y)
+
+    node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text',
+                            marker=dict(showscale=True, colorscale='Rainbow', reversescale=True, color=[], size=10,
+                                        colorbar=dict(thickness=15, title='Node Connections', xanchor='left',
+                                                      titleside='right'), line_width=2))
+
+    # Color node points by number of connections
+    node_adjacencies = []
+    node_text = []
+    for node in G.nodes():
+        adjacencies = list(G.adj[node])  # changes here
+        node_adjacencies.append(len(adjacencies))
+        node_text.append(f'{node}<br># of connections: {len(adjacencies)}')
+
+    node_trace.marker.color = node_adjacencies
+    node_trace.text = node_text
+
+    # Create the figure
+    fig = go.Figure(data=[edge_trace, node_trace],
+                    layout=go.Layout(title=title, titlefont_size=16, showlegend=False,
+                                     hovermode='closest', margin=dict(b=20, l=5, r=5, t=40),
+                                     xaxis=dict(
+                                         showgrid=False, zeroline=False, showticklabels=False),
+                                     yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
+
+    # Show the figure
+    st.plotly_chart(fig)
+
+
+def create_annotated_text(input_string: str, word_list: List[str], annotation: str, color_code: str):
+    # Tokenize the input string
+    tokens = nltk.word_tokenize(input_string)
+
+    # Convert the list to a set for quick lookups
+    word_set = set(word_list)
+
+    # Initialize an empty list to hold the annotated text
+    annotated_text = []
+
+    for token in tokens:
+        # Check if the token is in the set
+        if token in word_set:
+            # If it is, append a tuple with the token, annotation, and color code
+            annotated_text.append((token, annotation, color_code))
+        else:
+            # If it's not, just append the token as a string
+            annotated_text.append(token)
+
+    return annotated_text
 
 
 def read_json(filename):
@@ -20,25 +108,54 @@ def read_json(filename):
     return data
 
 
-# read the json file
-resume = read_json(
-    'Data/Processed/Resume-d531571e-e4fa-45eb-ab6a-267cdeb6647e.json')
-job_desc = read_json(
-    'Data/Processed/Job-Desc-a4f06ccb-8d5a-4d0b-9f02-3ba6d686472e.json')
+def tokenize_string(input_string):
+    tokens = nltk.word_tokenize(input_string)
+    return tokens
 
-st.write("### Reading Resume's POS")
-df = pd.DataFrame(resume['pos_frequencies'], index=[0])
-fig = go.Figure(data=go.Bar(y=list(resume['pos_frequencies'].values()), x=list(resume['pos_frequencies'].keys())),
-                layout_title_text="Resume's POS")
-st.write(fig)
 
-df2 = pd.DataFrame(resume['keyterms'], columns=["keyword", "value"])
-st.dataframe(df2)
+st.image('Assets/img/header_image.jpg')
+
+avs.add_vertical_space(5)
+
+resume_names = get_filenames_from_dir("Data/Processed/Resumes")
+
+st.write("There are", len(resume_names),
+         " resumes present. Please select one from the menu below:")
+output = st.slider('Select Resume Number', 0, len(resume_names)-1, 2)
+
+avs.add_vertical_space(5)
+
+st.write("You have selected ", resume_names[output], " printing the resume")
+selected_file = read_json("Data/Processed/Resumes/"+resume_names[output])
+
+avs.add_vertical_space(2)
+st.markdown("#### Parsed Resume Data")
+st.caption(
+    "This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS.")
+st.caption("Utilize this to understand how to make your resume ATS friendly.")
+avs.add_vertical_space(3)
+# st.json(selected_file)
+st.write(selected_file["clean_data"])
+
+avs.add_vertical_space(3)
+st.write("Now let's take a look at the extracted keywords from the resume.")
+
+annotated_text(create_annotated_text(
+    selected_file["clean_data"], selected_file["extracted_keywords"],
+    "KW", "#0B666A"))
+
+avs.add_vertical_space(5)
+st.write("Now let's take a look at the extracted entities from the resume.")
+
+# Call the function with your data
+create_star_graph(selected_file['keyterms'], "Entities from Resume")
+
+df2 = pd.DataFrame(selected_file['keyterms'], columns=["keyword", "value"])
 
 # Create the dictionary
 keyword_dict = {}
-for keyword, value in resume['keyterms']:
-    keyword_dict[keyword] = value
+for keyword, value in selected_file['keyterms']:
+    keyword_dict[keyword] = value*100
 
 fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
                                            font=dict(size=12),
@@ -52,50 +169,155 @@ def read_json(filename):
 
 st.divider()
 
-for keyword, value in resume['keyterms']:
-    pass
+fig = px.treemap(df2, path=['keyword'], values='value',
+                 color_continuous_scale='Rainbow',
+                 title='Key Terms/Topics Extracted from your Resume')
+st.write(fig)
+
+avs.add_vertical_space(5)
+
+job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
+
+st.write("There are", len(job_descriptions),
+         " resumes present. Please select one from the menu below:")
+output = st.slider('Select Job Description Number',
+                   0, len(job_descriptions)-1, 2)
+
+avs.add_vertical_space(5)
+
+st.write("You have selected ",
+         job_descriptions[output], " printing the job description")
+selected_jd = read_json(
+    "Data/Processed/JobDescription/"+job_descriptions[output])
 
+avs.add_vertical_space(2)
+st.markdown("#### Job Description")
+st.caption(
+    "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste.")
+avs.add_vertical_space(3)
+# st.json(selected_file)
+st.write(selected_jd["clean_data"])
 
-# display the waffle chart
-figure = plt.figure(
-    FigureClass=pywaffle.Waffle,
-    rows=20,
-    columns=20,
-    values=keyword_dict,
-    legend={'loc': 'upper left', 'bbox_to_anchor': (1, 1)})
+st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
 
+annotated_text(create_annotated_text(
+    selected_file["clean_data"], selected_jd["extracted_keywords"],
+    "JD", "#F24C3D"))
 
-# Display the dictionary
+st.write("Now let's take a look at the extracted entities from the job description.")
 
-st.pyplot(fig=figure)
-# st.write(dict)
+# Call the function with your data
+create_star_graph(selected_jd['keyterms'], "Entities from Job Description")
+
+df2 = pd.DataFrame(selected_jd['keyterms'], columns=["keyword", "value"])
+
+# Create the dictionary
+keyword_dict = {}
+for keyword, value in selected_jd['keyterms']:
+    keyword_dict[keyword] = value*100
+
+fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
+                                           font=dict(size=12),
+                                           fill_color='#070A52'),
+                               cells=dict(values=[list(keyword_dict.keys()),
+                                                  list(keyword_dict.values())],
+                                          line_color='darkslategray',
+                                          fill_color='#6DA9E4'))
+                      ])
+st.plotly_chart(fig)
+
+st.divider()
 
 fig = px.treemap(df2, path=['keyword'], values='value',
-                 color_continuous_scale='RdBu',
-                 title='Resume POS')
+                 color_continuous_scale='Rainbow',
+                 title='Key Terms/Topics Extracted from the selected Job Description')
 st.write(fig)
 
+avs.add_vertical_space(5)
+
+st.divider()
+
+st.markdown("## Vector Similarity Scores")
+st.caption("Powered by Qdrant Vector Search")
+st.info("These are pre-computed queries", icon="ℹ")
+st.warning(
+    "Running Qdrant or Sentence Transformers without having capacity is not recommended", icon="⚠")
+
+
+# Your data
+data = [
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Product Manager', 'score': 0.62658},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Product Manager', 'score': 0.43777737},
+    {'text': "{'resume': 'Bruce Wayne ",
+        'query': 'Job Description Product Manager', 'score': 0.39835533},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Product Manager', 'score': 0.3915512},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Product Manager', 'score': 0.3519544},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.6541866},
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.59806436},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.5951386},
+    {'text': "{'resume': 'Bruce Wayne ",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.57700855},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.38489106},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Front End Engineer', 'score': 0.76813436},
+    {'text': "{'resume': 'Bruce Wayne'",
+        'query': 'Job Description Front End Engineer', 'score': 0.60440844},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Front End Engineer', 'score': 0.56080043},
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Front End Engineer', 'score': 0.5395049},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Front End Engineer', 'score': 0.3859515},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Java Developer', 'score': 0.5449441},
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Java Developer', 'score': 0.53476423},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Java Developer', 'score': 0.5313871},
+    {'text': "{'resume': 'Bruce Wayne ",
+        'query': 'Job Description Java Developer', 'score': 0.44446343},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Java Developer', 'score': 0.3616274}
+]
+
+# Create a DataFrame
+df = pd.DataFrame(data)
+
+# Create different DataFrames based on the query and sort by score
+df1 = df[df['query'] ==
+         'Job Description Product Manager'].sort_values(by='score', ascending=False)
+df2 = df[df['query'] ==
+         'Job Description Senior Full Stack Engineer'].sort_values(by='score', ascending=False)
+df3 = df[df['query'] == 'Job Description Front End Engineer'].sort_values(
+    by='score', ascending=False)
+df4 = df[df['query'] == 'Job Description Java Developer'].sort_values(
+    by='score', ascending=False)
 
-st.plotly_chart(figure_or_data=fig)
 
-fig = go.Figure(data=[go.Table(
-    header=dict(values=["Tri Grams"],
-                fill_color='#1D267D',
-                align='center', font=dict(color='white', size=16)),
-    cells=dict(values=[resume['tri_grams']],
-               fill_color='#19A7CE',
-               align='left'))])
+def plot_df(df, title):
+    fig = px.bar(df, x='text', y=df['score']*100, title=title)
+    st.plotly_chart(fig)
 
-st.plotly_chart(figure_or_data=fig)
 
-fig = go.Figure(data=[go.Table(
-    header=dict(values=["Bi Grams"],
-                fill_color='#1D267D',
-                align='center', font=dict(color='white', size=16)),
-    cells=dict(values=[resume['bi_grams']],
-               fill_color='#19A7CE',
-               align='left'))])
+st.markdown("### Bar plots of scores based on similarity to Job Description.")
 
-st.plotly_chart(figure_or_data=fig)
+st.subheader(":blue[Legend]")
+st.text("Alfred Pennyworth :  Product Manager")
+st.text("Barry Allen :  Front End Developer")
+st.text("Harvey Dent :  Machine Learning Engineer")
+st.text("Bruce Wayne :  Fullstack Developer (MERN)")
+st.text("John Doe :  Fullstack Developer (Java)")
 
 
+plot_df(df1, 'Job Description Product Manager 10+ Years of Exper')
+plot_df(df2, 'Job Description Senior Full Stack Engineer 5+ Year')
+plot_df(df3, 'Job Description Front End Engineer 2 Years of Expe')
+plot_df(df4, 'Job Description Java Developer 3 Years of Experien')
diff --git a/streamlit_second.py b/streamlit_second.py
deleted file mode 100644
index d663934b..00000000
--- a/streamlit_second.py
+++ /dev/null
@@ -1,323 +0,0 @@
-import networkx as nx
-from typing import List
-import streamlit as st
-import pandas as pd
-import json
-import plotly.express as px
-import plotly.graph_objects as go
-from scripts.utils.ReadFiles import get_filenames_from_dir
-from streamlit_extras import add_vertical_space as avs
-from annotated_text import annotated_text, parameters
-import nltk
-nltk.download('punkt')
-
-parameters.SHOW_LABEL_SEPARATOR = False
-parameters.BORDER_RADIUS = 3
-parameters.PADDING = "0.5 0.25rem"
-
-
-def create_star_graph(nodes_and_weights, title):
-    # Create an empty graph
-    G = nx.Graph()
-
-    # Add the central node
-    central_node = "resume"
-    G.add_node(central_node)
-
-    # Add nodes and edges with weights to the graph
-    for node, weight in nodes_and_weights:
-        G.add_node(node)
-        G.add_edge(central_node, node, weight=weight*100)
-
-    # Get position layout for nodes
-    pos = nx.spring_layout(G)
-
-    # Create edge trace
-    edge_x = []
-    edge_y = []
-    for edge in G.edges():
-        x0, y0 = pos[edge[0]]
-        x1, y1 = pos[edge[1]]
-        edge_x.extend([x0, x1, None])
-        edge_y.extend([y0, y1, None])
-
-    edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(
-        width=0.5, color='#888'), hoverinfo='none', mode='lines')
-
-    # Create node trace
-    node_x = []
-    node_y = []
-    for node in G.nodes():
-        x, y = pos[node]
-        node_x.append(x)
-        node_y.append(y)
-
-    node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text',
-                            marker=dict(showscale=True, colorscale='Rainbow', reversescale=True, color=[], size=10,
-                                        colorbar=dict(thickness=15, title='Node Connections', xanchor='left',
-                                                      titleside='right'), line_width=2))
-
-    # Color node points by number of connections
-    node_adjacencies = []
-    node_text = []
-    for node in G.nodes():
-        adjacencies = list(G.adj[node])  # changes here
-        node_adjacencies.append(len(adjacencies))
-        node_text.append(f'{node}<br># of connections: {len(adjacencies)}')
-
-    node_trace.marker.color = node_adjacencies
-    node_trace.text = node_text
-
-    # Create the figure
-    fig = go.Figure(data=[edge_trace, node_trace],
-                    layout=go.Layout(title=title, titlefont_size=16, showlegend=False,
-                                     hovermode='closest', margin=dict(b=20, l=5, r=5, t=40),
-                                     xaxis=dict(
-                                         showgrid=False, zeroline=False, showticklabels=False),
-                                     yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
-
-    # Show the figure
-    st.plotly_chart(fig)
-
-
-def create_annotated_text(input_string: str, word_list: List[str], annotation: str, color_code: str):
-    # Tokenize the input string
-    tokens = nltk.word_tokenize(input_string)
-
-    # Convert the list to a set for quick lookups
-    word_set = set(word_list)
-
-    # Initialize an empty list to hold the annotated text
-    annotated_text = []
-
-    for token in tokens:
-        # Check if the token is in the set
-        if token in word_set:
-            # If it is, append a tuple with the token, annotation, and color code
-            annotated_text.append((token, annotation, color_code))
-        else:
-            # If it's not, just append the token as a string
-            annotated_text.append(token)
-
-    return annotated_text
-
-
-def read_json(filename):
-    with open(filename) as f:
-        data = json.load(f)
-    return data
-
-
-def tokenize_string(input_string):
-    tokens = nltk.word_tokenize(input_string)
-    return tokens
-
-
-st.image('Assets/img/header_image.jpg')
-
-avs.add_vertical_space(5)
-
-resume_names = get_filenames_from_dir("Data/Processed/Resumes")
-
-st.write("There are", len(resume_names),
-         " resumes present. Please select one from the menu below:")
-output = st.slider('Select Resume Number', 0, len(resume_names)-1, 2)
-
-avs.add_vertical_space(5)
-
-st.write("You have selected ", resume_names[output], " printing the resume")
-selected_file = read_json("Data/Processed/Resumes/"+resume_names[output])
-
-avs.add_vertical_space(2)
-st.markdown("#### Parsed Resume Data")
-st.caption(
-    "This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS.")
-st.caption("Utilize this to understand how to make your resume ATS friendly.")
-avs.add_vertical_space(3)
-# st.json(selected_file)
-st.write(selected_file["clean_data"])
-
-avs.add_vertical_space(3)
-st.write("Now let's take a look at the extracted keywords from the resume.")
-
-annotated_text(create_annotated_text(
-    selected_file["clean_data"], selected_file["extracted_keywords"],
-    "KW", "#0B666A"))
-
-avs.add_vertical_space(5)
-st.write("Now let's take a look at the extracted entities from the resume.")
-
-# Call the function with your data
-create_star_graph(selected_file['keyterms'], "Entities from Resume")
-
-df2 = pd.DataFrame(selected_file['keyterms'], columns=["keyword", "value"])
-
-# Create the dictionary
-keyword_dict = {}
-for keyword, value in selected_file['keyterms']:
-    keyword_dict[keyword] = value*100
-
-fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
-                                           font=dict(size=12),
-                                           fill_color='#070A52'),
-                               cells=dict(values=[list(keyword_dict.keys()),
-                                                  list(keyword_dict.values())],
-                                          line_color='darkslategray',
-                                          fill_color='#6DA9E4'))
-                      ])
-st.plotly_chart(fig)
-
-st.divider()
-
-fig = px.treemap(df2, path=['keyword'], values='value',
-                 color_continuous_scale='Rainbow',
-                 title='Key Terms/Topics Extracted from your Resume')
-st.write(fig)
-
-avs.add_vertical_space(5)
-
-job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
-
-st.write("There are", len(job_descriptions),
-         " resumes present. Please select one from the menu below:")
-output = st.slider('Select Job Description Number',
-                   0, len(job_descriptions)-1, 2)
-
-avs.add_vertical_space(5)
-
-st.write("You have selected ",
-         job_descriptions[output], " printing the job description")
-selected_jd = read_json(
-    "Data/Processed/JobDescription/"+job_descriptions[output])
-
-avs.add_vertical_space(2)
-st.markdown("#### Job Description")
-st.caption(
-    "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste.")
-avs.add_vertical_space(3)
-# st.json(selected_file)
-st.write(selected_jd["clean_data"])
-
-st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
-
-annotated_text(create_annotated_text(
-    selected_file["clean_data"], selected_jd["extracted_keywords"],
-    "JD", "#F24C3D"))
-
-st.write("Now let's take a look at the extracted entities from the job description.")
-
-# Call the function with your data
-create_star_graph(selected_jd['keyterms'], "Entities from Job Description")
-
-df2 = pd.DataFrame(selected_jd['keyterms'], columns=["keyword", "value"])
-
-# Create the dictionary
-keyword_dict = {}
-for keyword, value in selected_jd['keyterms']:
-    keyword_dict[keyword] = value*100
-
-fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
-                                           font=dict(size=12),
-                                           fill_color='#070A52'),
-                               cells=dict(values=[list(keyword_dict.keys()),
-                                                  list(keyword_dict.values())],
-                                          line_color='darkslategray',
-                                          fill_color='#6DA9E4'))
-                      ])
-st.plotly_chart(fig)
-
-st.divider()
-
-fig = px.treemap(df2, path=['keyword'], values='value',
-                 color_continuous_scale='Rainbow',
-                 title='Key Terms/Topics Extracted from the selected Job Description')
-st.write(fig)
-
-avs.add_vertical_space(5)
-
-st.divider()
-
-st.markdown("## Vector Similarity Scores")
-st.caption("Powered by Qdrant Vector Search")
-st.info("These are pre-computed queries", icon="ℹ")
-st.warning(
-    "Running Qdrant or Sentence Transformers without having capacity is not recommended", icon="⚠")
-
-
-# Your data
-data = [
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Product Manager', 'score': 0.62658},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Product Manager', 'score': 0.43777737},
-    {'text': "{'resume': 'Bruce Wayne ",
-        'query': 'Job Description Product Manager', 'score': 0.39835533},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Product Manager', 'score': 0.3915512},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Product Manager', 'score': 0.3519544},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.6541866},
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.59806436},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.5951386},
-    {'text': "{'resume': 'Bruce Wayne ",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.57700855},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.38489106},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Front End Engineer', 'score': 0.76813436},
-    {'text': "{'resume': 'Bruce Wayne'",
-        'query': 'Job Description Front End Engineer', 'score': 0.60440844},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Front End Engineer', 'score': 0.56080043},
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Front End Engineer', 'score': 0.5395049},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Front End Engineer', 'score': 0.3859515},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Java Developer', 'score': 0.5449441},
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Java Developer', 'score': 0.53476423},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Java Developer', 'score': 0.5313871},
-    {'text': "{'resume': 'Bruce Wayne ",
-        'query': 'Job Description Java Developer', 'score': 0.44446343},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Java Developer', 'score': 0.3616274}
-]
-
-# Create a DataFrame
-df = pd.DataFrame(data)
-
-# Create different DataFrames based on the query and sort by score
-df1 = df[df['query'] ==
-         'Job Description Product Manager'].sort_values(by='score', ascending=False)
-df2 = df[df['query'] ==
-         'Job Description Senior Full Stack Engineer'].sort_values(by='score', ascending=False)
-df3 = df[df['query'] == 'Job Description Front End Engineer'].sort_values(
-    by='score', ascending=False)
-df4 = df[df['query'] == 'Job Description Java Developer'].sort_values(
-    by='score', ascending=False)
-
-
-def plot_df(df, title):
-    fig = px.bar(df, x='text', y=df['score']*100, title=title)
-    st.plotly_chart(fig)
-
-
-st.markdown("### Bar plots of scores based on similarity to Job Description.")
-
-st.subheader(":blue[Legend]")
-st.text("Alfred Pennyworth :  Product Manager")
-st.text("Barry Allen :  Front End Developer")
-st.text("Harvey Dent :  Machine Learning Engineer")
-st.text("Bruce Wayne :  Fullstack Developer (MERN)")
-st.text("John Doe :  Fullstack Developer (Java)")
-
-
-plot_df(df1, 'Job Description Product Manager 10+ Years of Exper')
-plot_df(df2, 'Job Description Senior Full Stack Engineer 5+ Year')
-plot_df(df3, 'Job Description Front End Engineer 2 Years of Expe')
-plot_df(df4, 'Job Description Java Developer 3 Years of Experien')

From 0c27815fb06721d1b6560488beda3e8307c9a4d7 Mon Sep 17 00:00:00 2001
From: saurabhraidev <srbh077@gmail.com>
Date: Tue, 18 Jul 2023 12:01:20 +0530
Subject: [PATCH 2/2] Updated streamlit_apps  &  readme.

---
 README.md           |  66 ++++++---
 streamlit_app.py    | 114 ++++----------
 streamlit_second.py | 354 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 428 insertions(+), 106 deletions(-)
 create mode 100644 streamlit_second.py

diff --git a/README.md b/README.md
index 4c46d27d..e0b1cbd2 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 
 # Resume Matcher
 
-## AI Based Resume Matcher to tailor your resume to a job description. Find the bestkewords, and gain deep insights into your resume.
+## AI Based Free & Open Source ATS, Resume Matcher to tailor your resume to a job description. Find the best keywords, and gain deep insights into your resume.
 
 </div>
 
@@ -19,37 +19,61 @@
 
 [![Resume Matcher](https://custom-icon-badges.demolab.com/badge/www.resumematcher.fyi-gold?style=for-the-badge&logo=globe&logoColor=black)](https://www.resumematcher.fyi)
 
+[![Live Demo](https://custom-icon-badges.demolab.com/badge/live-demo-red?style=for-the-badge&logo=globe&logoColor=black)](https://resume-matcher.streamlit.app/)
+
 </div>
 
-A Machine Learning Based Resume Matcher, to compare Resumes with Job Descriptions.
-Create a score based on how good/similar a resume is to the particular Job Description.\n
-Documents are sorted based on Their TF-IDF Scores (Term Frequency-Inverse Document Frequency)
+### How does It work?
+
+The Resume Matcher takes your resume and job descriptions as input, parses them using Python, and mimics the functionalities of an ATS, providing you with insights and suggestions to make your resume ATS-friendly.
+
+The process is as follows:
+
+1. **Parsing**: The system uses Python to parse both your resume and the provided job description, just like an ATS would. Parsing is critical as it transforms your documents into a format the system can readily analyze.
+
+2. **Keyword Extraction**: The tool uses advanced machine learning algorithms to extract the most relevant keywords from the job description. These keywords represent the skills, qualifications, and experiences the employer seeks.
+
+3. **Key Terms Extraction**: Beyond keyword extraction, the tool uses textacy to identify the main key terms or themes in the job description. This step helps in understanding the broader context of what the resume is about.
+
+4. **Vector Similarity Using Qdrant**: The tool uses Qdrant, a highly efficient vector similarity search tool, to measure how closely your resume matches the job description. This process is done by representing your resume and job description as vectors in a high-dimensional space and calculating their cosine similarity. The more similar they are, the higher the likelihood that your resume will pass the ATS screening.
+
+On top of that, there are various data visualizations that I've added to help you get started.
 
-Matching Algorihms used are :-
+#### PRs Welcomed 🤗
 
--   **String Matching**
+<br/>
+
+---
+
+<div align="center">
+
+## How to install
 
-    -   Monge Elkan
+</div>
 
--   **Token Based**
-    -   Jaccard
-    -   Cosine
-    -   Sorensen-Dice
-    -   Overlap Coefficient
+1. Clone the project.
+2. Create a python virtual environment.
+3. Activate the virtual environment.
+4. Do `pip install -r requirements.txt` to install all dependencies.
+5. Put your resumes in PDF Format in the `Data/Resumes` folder. (Delete the existing contents)
+6. Put your Job Descriptions in PDF Format in `Data/JobDescription` folder. (Delete the existing contents)
+7. Run `python run_first.py` this will parse all the resumes to JSON.
+8. Run `streamlit run streamlit_app.py`.
 
-Topic Modelling of Resumes is done to provide additional information about the resumes and what clusters/topics,
-the belong to.
-For this :-
+**Note**: For local versions don't run the streamlit_second.app it's for deploying to streamlit.
 
-1. [TF-IDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf) of resumes is done to improve the sentence similarities. As it helps reduce the redundant terms and brings out the important ones.
-2. id2word, and doc2word algorithms are used on the Documents (from Gensim Library).
-3. [LDA](https://en.wikipedia.org/wiki/Latent_Dirichlet_allocation) (Latent Dirichlet Allocation) is done to extract the Topics from the Document set.(In this case Resumes)
-4. Additional Plots are done to gain more insights about the document.
+Note: The Vector Similarity Part is precomputed here. As sentence encoders require heavy GPU and Memory (RAM). I am working on a blog that will show how you can leverage that in a google colab environment for free.
 
 <br/>
 
 ---
 
-### Older Version
+### Note 📝
+
+Thanks for the support 💙 this is an ongoing project that I want to build with open source community. There are many ways in which this tool can be upgraded. This includes (not limited to):
 
-Check the older version of the project [**here**](https://github.com/srbhr/Naive-Resume-Matching/blob/master/README.md).
+-   Create a better dashboard instead of Streamlit.
+-   Add more features like upploading of resumes and parsing.
+-   Add a docker image for easy usage.
+-   Contribute to better parsing algorithm.
+-   Contribute to on a blog to how to make this work.
diff --git a/streamlit_app.py b/streamlit_app.py
index d663934b..8fca3362 100644
--- a/streamlit_app.py
+++ b/streamlit_app.py
@@ -8,6 +8,7 @@
 from scripts.utils.ReadFiles import get_filenames_from_dir
 from streamlit_extras import add_vertical_space as avs
 from annotated_text import annotated_text, parameters
+from streamlit_extras.badges import badge
 import nltk
 nltk.download('punkt')
 
@@ -115,6 +116,22 @@ def tokenize_string(input_string):
 
 st.image('Assets/img/header_image.jpg')
 
+st.title(':blue[Resume Matcher]')
+st.subheader(
+    'Free and Open Source ATS to help your resume pass the screening stage.')
+st.markdown(
+    "Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)")
+st.markdown(
+    '⭐ Give Resume Matcher a Star on [GitHub](https://github.com/srbhr/Naive-Resume-Matching/)')
+badge(type="github", name="srbhr/Naive-Resume-Matching")
+
+st.text('For updates follow me on Twitter.')
+badge(type="twitter", name="_srbhr_")
+
+st.markdown(
+    'If you like the project and would like to further help in development please consider 👇')
+badge(type="buymeacoffee", name="srbhr")
+
 avs.add_vertical_space(5)
 
 resume_names = get_filenames_from_dir("Data/Processed/Resumes")
@@ -233,91 +250,18 @@ def tokenize_string(input_string):
                  title='Key Terms/Topics Extracted from the selected Job Description')
 st.write(fig)
 
-avs.add_vertical_space(5)
-
-st.divider()
-
-st.markdown("## Vector Similarity Scores")
-st.caption("Powered by Qdrant Vector Search")
-st.info("These are pre-computed queries", icon="ℹ")
-st.warning(
-    "Running Qdrant or Sentence Transformers without having capacity is not recommended", icon="⚠")
-
-
-# Your data
-data = [
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Product Manager', 'score': 0.62658},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Product Manager', 'score': 0.43777737},
-    {'text': "{'resume': 'Bruce Wayne ",
-        'query': 'Job Description Product Manager', 'score': 0.39835533},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Product Manager', 'score': 0.3915512},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Product Manager', 'score': 0.3519544},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.6541866},
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.59806436},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.5951386},
-    {'text': "{'resume': 'Bruce Wayne ",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.57700855},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.38489106},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Front End Engineer', 'score': 0.76813436},
-    {'text': "{'resume': 'Bruce Wayne'",
-        'query': 'Job Description Front End Engineer', 'score': 0.60440844},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Front End Engineer', 'score': 0.56080043},
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Front End Engineer', 'score': 0.5395049},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Front End Engineer', 'score': 0.3859515},
-    {'text': "{'resume': 'JOHN DOE",
-        'query': 'Job Description Java Developer', 'score': 0.5449441},
-    {'text': "{'resume': 'Alfred Pennyworth",
-        'query': 'Job Description Java Developer', 'score': 0.53476423},
-    {'text': "{'resume': 'Barry Allen",
-        'query': 'Job Description Java Developer', 'score': 0.5313871},
-    {'text': "{'resume': 'Bruce Wayne ",
-        'query': 'Job Description Java Developer', 'score': 0.44446343},
-    {'text': "{'resume': 'Harvey Dent",
-        'query': 'Job Description Java Developer', 'score': 0.3616274}
-]
-
-# Create a DataFrame
-df = pd.DataFrame(data)
-
-# Create different DataFrames based on the query and sort by score
-df1 = df[df['query'] ==
-         'Job Description Product Manager'].sort_values(by='score', ascending=False)
-df2 = df[df['query'] ==
-         'Job Description Senior Full Stack Engineer'].sort_values(by='score', ascending=False)
-df3 = df[df['query'] == 'Job Description Front End Engineer'].sort_values(
-    by='score', ascending=False)
-df4 = df[df['query'] == 'Job Description Java Developer'].sort_values(
-    by='score', ascending=False)
-
-
-def plot_df(df, title):
-    fig = px.bar(df, x='text', y=df['score']*100, title=title)
-    st.plotly_chart(fig)
-
-
-st.markdown("### Bar plots of scores based on similarity to Job Description.")
+avs.add_vertical_space(3)
 
-st.subheader(":blue[Legend]")
-st.text("Alfred Pennyworth :  Product Manager")
-st.text("Barry Allen :  Front End Developer")
-st.text("Harvey Dent :  Machine Learning Engineer")
-st.text("Bruce Wayne :  Fullstack Developer (MERN)")
-st.text("John Doe :  Fullstack Developer (Java)")
+st.title(':blue[Resume Matcher]')
+st.subheader(
+    'Free and Open Source ATS to help your resume pass the screening stage.')
+st.markdown(
+    '⭐ Give Resume Matcher a Star on [GitHub](https://github.com/srbhr/Naive-Resume-Matching/)')
+badge(type="github", name="srbhr/Naive-Resume-Matching")
 
+st.text('For updates follow me on Twitter.')
+badge(type="twitter", name="_srbhr_")
 
-plot_df(df1, 'Job Description Product Manager 10+ Years of Exper')
-plot_df(df2, 'Job Description Senior Full Stack Engineer 5+ Year')
-plot_df(df3, 'Job Description Front End Engineer 2 Years of Expe')
-plot_df(df4, 'Job Description Java Developer 3 Years of Experien')
+st.markdown(
+    'If you like the project and would like to further help in development please consider 👇')
+badge(type="buymeacoffee", name="srbhr")
diff --git a/streamlit_second.py b/streamlit_second.py
new file mode 100644
index 00000000..a55f71c8
--- /dev/null
+++ b/streamlit_second.py
@@ -0,0 +1,354 @@
+import networkx as nx
+from typing import List
+import streamlit as st
+import pandas as pd
+import json
+import plotly.express as px
+import plotly.graph_objects as go
+from scripts.utils.ReadFiles import get_filenames_from_dir
+from streamlit_extras import add_vertical_space as avs
+from annotated_text import annotated_text, parameters
+from streamlit_extras.badges import badge
+import nltk
+nltk.download('punkt')
+
+parameters.SHOW_LABEL_SEPARATOR = False
+parameters.BORDER_RADIUS = 3
+parameters.PADDING = "0.5 0.25rem"
+
+
+def create_star_graph(nodes_and_weights, title):
+    # Create an empty graph
+    G = nx.Graph()
+
+    # Add the central node
+    central_node = "resume"
+    G.add_node(central_node)
+
+    # Add nodes and edges with weights to the graph
+    for node, weight in nodes_and_weights:
+        G.add_node(node)
+        G.add_edge(central_node, node, weight=weight*100)
+
+    # Get position layout for nodes
+    pos = nx.spring_layout(G)
+
+    # Create edge trace
+    edge_x = []
+    edge_y = []
+    for edge in G.edges():
+        x0, y0 = pos[edge[0]]
+        x1, y1 = pos[edge[1]]
+        edge_x.extend([x0, x1, None])
+        edge_y.extend([y0, y1, None])
+
+    edge_trace = go.Scatter(x=edge_x, y=edge_y, line=dict(
+        width=0.5, color='#888'), hoverinfo='none', mode='lines')
+
+    # Create node trace
+    node_x = []
+    node_y = []
+    for node in G.nodes():
+        x, y = pos[node]
+        node_x.append(x)
+        node_y.append(y)
+
+    node_trace = go.Scatter(x=node_x, y=node_y, mode='markers', hoverinfo='text',
+                            marker=dict(showscale=True, colorscale='Rainbow', reversescale=True, color=[], size=10,
+                                        colorbar=dict(thickness=15, title='Node Connections', xanchor='left',
+                                                      titleside='right'), line_width=2))
+
+    # Color node points by number of connections
+    node_adjacencies = []
+    node_text = []
+    for node in G.nodes():
+        adjacencies = list(G.adj[node])  # changes here
+        node_adjacencies.append(len(adjacencies))
+        node_text.append(f'{node}<br># of connections: {len(adjacencies)}')
+
+    node_trace.marker.color = node_adjacencies
+    node_trace.text = node_text
+
+    # Create the figure
+    fig = go.Figure(data=[edge_trace, node_trace],
+                    layout=go.Layout(title=title, titlefont_size=16, showlegend=False,
+                                     hovermode='closest', margin=dict(b=20, l=5, r=5, t=40),
+                                     xaxis=dict(
+                                         showgrid=False, zeroline=False, showticklabels=False),
+                                     yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
+
+    # Show the figure
+    st.plotly_chart(fig)
+
+
+def create_annotated_text(input_string: str, word_list: List[str], annotation: str, color_code: str):
+    # Tokenize the input string
+    tokens = nltk.word_tokenize(input_string)
+
+    # Convert the list to a set for quick lookups
+    word_set = set(word_list)
+
+    # Initialize an empty list to hold the annotated text
+    annotated_text = []
+
+    for token in tokens:
+        # Check if the token is in the set
+        if token in word_set:
+            # If it is, append a tuple with the token, annotation, and color code
+            annotated_text.append((token, annotation, color_code))
+        else:
+            # If it's not, just append the token as a string
+            annotated_text.append(token)
+
+    return annotated_text
+
+
+def read_json(filename):
+    with open(filename) as f:
+        data = json.load(f)
+    return data
+
+
+def tokenize_string(input_string):
+    tokens = nltk.word_tokenize(input_string)
+    return tokens
+
+
+st.image('Assets/img/header_image.jpg')
+
+st.title(':blue[Resume Matcher]')
+st.subheader(
+    'Free and Open Source ATS to help your resume pass the screening stage.')
+st.markdown(
+    "Check the website [www.resumematcher.fyi](https://www.resumematcher.fyi/)")
+st.markdown(
+    '⭐ Give Resume Matcher a Star on [GitHub](https://github.com/srbhr/Naive-Resume-Matching/)')
+badge(type="github", name="srbhr/Naive-Resume-Matching")
+
+st.text('For updates follow me on Twitter.')
+badge(type="twitter", name="_srbhr_")
+
+st.markdown(
+    'If you like the project and would like to further help in development please consider 👇')
+badge(type="buymeacoffee", name="srbhr")
+
+avs.add_vertical_space(5)
+
+resume_names = get_filenames_from_dir("Data/Processed/Resumes")
+
+st.write("There are", len(resume_names),
+         " resumes present. Please select one from the menu below:")
+output = st.slider('Select Resume Number', 0, len(resume_names)-1, 2)
+
+avs.add_vertical_space(5)
+
+st.write("You have selected ", resume_names[output], " printing the resume")
+selected_file = read_json("Data/Processed/Resumes/"+resume_names[output])
+
+avs.add_vertical_space(2)
+st.markdown("#### Parsed Resume Data")
+st.caption(
+    "This text is parsed from your resume. This is how it'll look like after getting parsed by an ATS.")
+st.caption("Utilize this to understand how to make your resume ATS friendly.")
+avs.add_vertical_space(3)
+# st.json(selected_file)
+st.write(selected_file["clean_data"])
+
+avs.add_vertical_space(3)
+st.write("Now let's take a look at the extracted keywords from the resume.")
+
+annotated_text(create_annotated_text(
+    selected_file["clean_data"], selected_file["extracted_keywords"],
+    "KW", "#0B666A"))
+
+avs.add_vertical_space(5)
+st.write("Now let's take a look at the extracted entities from the resume.")
+
+# Call the function with your data
+create_star_graph(selected_file['keyterms'], "Entities from Resume")
+
+df2 = pd.DataFrame(selected_file['keyterms'], columns=["keyword", "value"])
+
+# Create the dictionary
+keyword_dict = {}
+for keyword, value in selected_file['keyterms']:
+    keyword_dict[keyword] = value*100
+
+fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
+                                           font=dict(size=12),
+                                           fill_color='#070A52'),
+                               cells=dict(values=[list(keyword_dict.keys()),
+                                                  list(keyword_dict.values())],
+                                          line_color='darkslategray',
+                                          fill_color='#6DA9E4'))
+                      ])
+st.plotly_chart(fig)
+
+st.divider()
+
+fig = px.treemap(df2, path=['keyword'], values='value',
+                 color_continuous_scale='Rainbow',
+                 title='Key Terms/Topics Extracted from your Resume')
+st.write(fig)
+
+avs.add_vertical_space(5)
+
+job_descriptions = get_filenames_from_dir("Data/Processed/JobDescription")
+
+st.write("There are", len(job_descriptions),
+         " resumes present. Please select one from the menu below:")
+output = st.slider('Select Job Description Number',
+                   0, len(job_descriptions)-1, 2)
+
+avs.add_vertical_space(5)
+
+st.write("You have selected ",
+         job_descriptions[output], " printing the job description")
+selected_jd = read_json(
+    "Data/Processed/JobDescription/"+job_descriptions[output])
+
+avs.add_vertical_space(2)
+st.markdown("#### Job Description")
+st.caption(
+    "Currently in the pipeline I'm parsing this from PDF but it'll be from txt or copy paste.")
+avs.add_vertical_space(3)
+# st.json(selected_file)
+st.write(selected_jd["clean_data"])
+
+st.markdown("#### Common Words between Job Description and Resumes Highlighted.")
+
+annotated_text(create_annotated_text(
+    selected_file["clean_data"], selected_jd["extracted_keywords"],
+    "JD", "#F24C3D"))
+
+st.write("Now let's take a look at the extracted entities from the job description.")
+
+# Call the function with your data
+create_star_graph(selected_jd['keyterms'], "Entities from Job Description")
+
+df2 = pd.DataFrame(selected_jd['keyterms'], columns=["keyword", "value"])
+
+# Create the dictionary
+keyword_dict = {}
+for keyword, value in selected_jd['keyterms']:
+    keyword_dict[keyword] = value*100
+
+fig = go.Figure(data=[go.Table(header=dict(values=["Keyword", "Value"],
+                                           font=dict(size=12),
+                                           fill_color='#070A52'),
+                               cells=dict(values=[list(keyword_dict.keys()),
+                                                  list(keyword_dict.values())],
+                                          line_color='darkslategray',
+                                          fill_color='#6DA9E4'))
+                      ])
+st.plotly_chart(fig)
+
+st.divider()
+
+fig = px.treemap(df2, path=['keyword'], values='value',
+                 color_continuous_scale='Rainbow',
+                 title='Key Terms/Topics Extracted from the selected Job Description')
+st.write(fig)
+
+avs.add_vertical_space(5)
+
+st.divider()
+
+st.markdown("## Vector Similarity Scores")
+st.caption("Powered by Qdrant Vector Search")
+st.info("These are pre-computed queries", icon="ℹ")
+st.warning(
+    "Running Qdrant or Sentence Transformers without having capacity is not recommended", icon="⚠")
+
+
+# Your data
+data = [
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Product Manager', 'score': 0.62658},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Product Manager', 'score': 0.43777737},
+    {'text': "{'resume': 'Bruce Wayne ",
+        'query': 'Job Description Product Manager', 'score': 0.39835533},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Product Manager', 'score': 0.3915512},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Product Manager', 'score': 0.3519544},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.6541866},
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.59806436},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.5951386},
+    {'text': "{'resume': 'Bruce Wayne ",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.57700855},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Senior Full Stack Engineer', 'score': 0.38489106},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Front End Engineer', 'score': 0.76813436},
+    {'text': "{'resume': 'Bruce Wayne'",
+        'query': 'Job Description Front End Engineer', 'score': 0.60440844},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Front End Engineer', 'score': 0.56080043},
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Front End Engineer', 'score': 0.5395049},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Front End Engineer', 'score': 0.3859515},
+    {'text': "{'resume': 'JOHN DOE",
+        'query': 'Job Description Java Developer', 'score': 0.5449441},
+    {'text': "{'resume': 'Alfred Pennyworth",
+        'query': 'Job Description Java Developer', 'score': 0.53476423},
+    {'text': "{'resume': 'Barry Allen",
+        'query': 'Job Description Java Developer', 'score': 0.5313871},
+    {'text': "{'resume': 'Bruce Wayne ",
+        'query': 'Job Description Java Developer', 'score': 0.44446343},
+    {'text': "{'resume': 'Harvey Dent",
+        'query': 'Job Description Java Developer', 'score': 0.3616274}
+]
+
+# Create a DataFrame
+df = pd.DataFrame(data)
+
+# Create different DataFrames based on the query and sort by score
+df1 = df[df['query'] ==
+         'Job Description Product Manager'].sort_values(by='score', ascending=False)
+df2 = df[df['query'] ==
+         'Job Description Senior Full Stack Engineer'].sort_values(by='score', ascending=False)
+df3 = df[df['query'] == 'Job Description Front End Engineer'].sort_values(
+    by='score', ascending=False)
+df4 = df[df['query'] == 'Job Description Java Developer'].sort_values(
+    by='score', ascending=False)
+
+
+def plot_df(df, title):
+    fig = px.bar(df, x='text', y=df['score']*100, title=title)
+    st.plotly_chart(fig)
+
+
+st.markdown("### Bar plots of scores based on similarity to Job Description.")
+
+st.subheader(":blue[Legend]")
+st.text("Alfred Pennyworth :  Product Manager")
+st.text("Barry Allen :  Front End Developer")
+st.text("Harvey Dent :  Machine Learning Engineer")
+st.text("Bruce Wayne :  Fullstack Developer (MERN)")
+st.text("John Doe :  Fullstack Developer (Java)")
+
+
+plot_df(df1, 'Job Description Product Manager 10+ Years of Exper')
+plot_df(df2, 'Job Description Senior Full Stack Engineer 5+ Year')
+plot_df(df3, 'Job Description Front End Engineer 2 Years of Expe')
+plot_df(df4, 'Job Description Java Developer 3 Years of Experien')
+
+
+avs.add_vertical_space(3)
+
+st.markdown(
+    '⭐ Give Resume Matcher a Star on [GitHub](https://github.com/srbhr/Naive-Resume-Matching/)')
+badge(type="github", name="srbhr/Naive-Resume-Matching")
+
+st.text('For updates follow me on Twitter.')
+badge(type="twitter", name="_srbhr_")
+
+st.markdown(
+    'If you like the project and would like to further help in development please consider 👇')
+badge(type="buymeacoffee", name="srbhr")