diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..edde400 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +model/ +__pycache__/ diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..90a1a97 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,16 @@ +FROM python:3.11.9-slim-bookworm + +WORKDIR /app +ADD . /app + +RUN apt update && apt install -y gcc g++ + +RUN python3 -m pip cache purge +RUN python3 -m pip install --no-cache-dir -r requirements.txt +RUN python3 -m nltk.downloader "punkt" +RUN python3 -m nltk.downloader "stopwords" + +EXPOSE 8501 + +ENTRYPOINT [ "streamlit", "run" ] +CMD [ "app.py" ] \ No newline at end of file diff --git a/app.py b/app.py new file mode 100644 index 0000000..b0796db --- /dev/null +++ b/app.py @@ -0,0 +1,27 @@ +import streamlit as st +from websearching import web_search +from llama_cpp_inf import run_inference_lcpp + +def reply(query): + jsonstr = web_search(query) + results = run_inference_lcpp(jsonstr, query) + return results + +st.set_page_config(page_title="SearchPhi", page_icon="🔎") +# Title of the web app +st.title("SearchPhi🔎") +st.subheader("With llama.cpp!🦙") +# Input text box for the search query +query = st.text_input("Enter search term:") + +# Number of results to display +num_results = st.number_input("Number of results to display:", min_value=1, max_value=5, value=3) + +# Button to initiate search +if st.button("Search"): + if query: + results = reply(query) + st.write(f"**Results for '{query}':**") + st.write_stream(results) + else: + st.write("Please enter a search term.") \ No newline at end of file diff --git a/llama_cpp_inf.py b/llama_cpp_inf.py new file mode 100644 index 0000000..ef7ed9b --- /dev/null +++ b/llama_cpp_inf.py @@ -0,0 +1,42 @@ +## Imports +from llama_cpp import Llama +import re + +## Instantiate model from downloaded file +llm = Llama( + model_path="model/Phi-3-mini-4k-instruct-q4.gguf", + n_ctx=4096, # Context length to use + n_threads=14, # Number of CPU threads to use + n_gpu_layers=3 # Number of model layers to offload to GPU +) + +## Generation kwargs +generation_kwargs = { + "max_tokens":1024, + "stop":["<|end|>"], + "echo":False, # Echo the prompt in the output + "top_k":1 # This is essentially greedy decoding, since the model will always return the highest-probability token. Set this value > 1 for sampling decoding +} + +def run_inference_lcpp(jsonstr, user_search): + prompt = f"""Instructions for the assistant: Starting from the URLs and the keywords deriving from Google search results and provided to you in JSON format, generate a meaningful summary of the search results that satisfies the user's query. + URLs and keywords in JSON format: {jsonstr}. + User's query to satisfy: {user_search}""" + res = llm(prompt, **generation_kwargs) + response = res["choices"][0]["text"] + jsondict = eval(jsonstr) + addon = "Reference websites:\n- "+ '\n- '.join(list(jsondict.keys())) + input_string = response.replace("<|assistant|>", "") + "\n\n" + addon + frag_res = re.findall(r'\w+|\s+|[^\w\s]', input_string) + for word in frag_res: + yield word + +if __name__ == "__main__": + prompt = """Context: A vector database, vector store or vector search engine is a database that can store vectors (fixed-length lists of numbers) along with other data items. Vector databases typically implement one or more Approximate Nearest Neighbor (ANN) algorithms,[1][2] so that one can search the database with a query vector to retrieve the closest matching database records. + + Vectors are mathematical representations of data in a high-dimensional space. In this space, each dimension corresponds to a feature of the data, with the number of dimensions ranging from a few hundred to tens of thousands, depending on the complexity of the data being represented. A vector's position in this space represents its characteristics. Words, phrases, or entire documents, as well as images, audio, and other types of data, can all be vectorized; Prompt: Describe what is a vector database""" + res = llm(prompt, **generation_kwargs) # Res is a dictionary + + ## Unpack and the generated text from the LLM response dictionary and print it + print(res["choices"][0]["text"]) + # res is short for result \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..98c0a3a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +llama_cpp_python==0.2.83 +streamlit==1.37.0 +googlesearch-python==1.2.4 +nltk==3.8.1 +rake_nltk==1.0.6 +boilerpy3==1.0.7 diff --git a/websearching.py b/websearching.py new file mode 100644 index 0000000..4107fa0 --- /dev/null +++ b/websearching.py @@ -0,0 +1,30 @@ +from googlesearch import search +from rake_nltk import Rake +from boilerpy3 import extractors +import json + +extractor = extractors.ArticleExtractor() +r = Rake() + +# Function to perform web search +def web_search(query, num_results=5): + search_results = [] + for url in search(query, num_results=num_results): + search_results.append(url) + urls = list(set(search_results)) + jsonlike = {} + for url in urls: + try: + content = extractor.get_content_from_url(url) + r.extract_keywords_from_text(content) + keywords = r.get_ranked_phrases()[:20] + jsonlike.update({url: {"keywords": keywords}}) + except Exception: + continue + jsonstr = json.dumps(jsonlike) + return jsonstr + + + + +