Skip to content

Commit

Permalink
updated all dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
assafelovic committed Aug 15, 2024
1 parent 1172535 commit d9433d3
Show file tree
Hide file tree
Showing 18 changed files with 84 additions and 3,981 deletions.
1 change: 1 addition & 0 deletions frontend/styles.css
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,7 @@ footer {

#reportContainer {
font-family: 'Georgia', 'Times New Roman', Times, "Courier New", serif;
font-size: 18px !important;
background-color: rgba(255, 255, 255, 0.1);
font-family: 'Times New Roman', Times, "Courier New", serif;
border: none;
Expand Down
20 changes: 0 additions & 20 deletions gpt_researcher/llm_provider/__init__.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,5 @@
from .google.google import GoogleProvider
from .openai.openai import OpenAIProvider
from .azureopenai.azureopenai import AzureOpenAIProvider
from .groq.groq import GroqProvider
from .ollama.ollama import OllamaProvider
from .together.together import TogetherProvider
from .anthropic.anthropic import AnthropicProvider
from .mistral.mistral import MistralProvider
from .huggingface.huggingface import HuggingFaceProvider
from .unify.unify import UnifyProvider
from .generic import GenericLLMProvider

__all__ = [
"GoogleProvider",
"OpenAIProvider",
"AzureOpenAIProvider",
"OllamaProvider",
"GroqProvider",
"TogetherProvider",
"AnthropicProvider",
"MistralProvider",
"HuggingFaceProvider",
"UnifyProvider",
"GenericLLMProvider",
]
12 changes: 9 additions & 3 deletions gpt_researcher/retrievers/duckduckgo/duckduckgo.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from itertools import islice
from duckduckgo_search import DDGS
from ..utils import check_pkg


class Duckduckgo:
"""
Duckduckgo API Retriever
"""
def __init__(self, query):
check_pkg('duckduckgo_search')
from duckduckgo_search import DDGS
self.ddg = DDGS()
self.query = query

Expand All @@ -17,5 +19,9 @@ def search(self, max_results=5):
:param max_results:
:return:
"""
ddgs_gen = self.ddg.text(self.query, region='wt-wt', max_results=max_results)
return ddgs_gen
try:
search_response = self.ddg.text(self.query, region='wt-wt', max_results=max_results)
except Exception as e:
print(f"Error: {e}. Failed fetching sources. Resulting in empty response.")
search_response = []
return search_response
6 changes: 4 additions & 2 deletions gpt_researcher/retrievers/exa/exa.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import os

from exa_py import Exa
from ..utils import check_pkg


class ExaSearch:
Expand All @@ -14,6 +13,9 @@ def __init__(self, query):
Args:
query: The search query.
"""
# This validation is necessary since exa_py is optional
check_pkg("exa_py")
from exa_py import Exa
self.query = query
self.api_key = self._retrieve_api_key()
self.client = Exa(api_key=self.api_key)
Expand Down
2 changes: 0 additions & 2 deletions gpt_researcher/retrievers/google/google.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import requests
import json
from tavily import TavilyClient


class GoogleSearch:
Expand All @@ -21,7 +20,6 @@ def __init__(self, query, headers=None):
self.headers = headers or {}
self.api_key = self.headers.get("google_api_key") or self.get_api_key() # Use the passed api_key or fallback to environment variable
self.cx_key = self.headers.get("google_cx_key") or self.get_cx_key() # Use the passed cx_key or fallback to environment variable
self.client = TavilyClient(self.api_key)

def get_api_key(self):
"""
Expand Down
1 change: 0 additions & 1 deletion gpt_researcher/retrievers/searx/searx.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

# libraries
import os
from tavily import TavilyClient
from langchain_community.utilities import SearxSearchWrapper


Expand Down
11 changes: 5 additions & 6 deletions gpt_researcher/retrievers/serpapi/serpapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
# libraries
import os
import requests
from duckduckgo_search import DDGS
import urllib.parse


Expand Down Expand Up @@ -56,6 +55,7 @@ def search(self, max_results=7):
search_results = response.json()
if search_results:
results = search_results["organic_results"]
results_processed = 0
for result in results:
# skip youtube results
if "youtube.com" in result["link"]:
Expand All @@ -68,10 +68,9 @@ def search(self, max_results=7):
"body": result["snippet"],
}
search_response.append(search_result)
results_processed += 1
except Exception as e: # Fallback in case overload on Tavily Search API
print(f"Error: {e}")
ddg = DDGS()
search_response = ddg.text(self.query, region='wt-wt', max_results=max_results)
results_processed += 1
except Exception as e:
print(f"Error: {e}. Failed fetching sources. Resulting in empty response.")
search_response = []

return search_response
64 changes: 51 additions & 13 deletions gpt_researcher/retrievers/tavily/tavily_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@

# libraries
import os
from tavily import TavilyClient
from duckduckgo_search import DDGS
from typing import Literal, Sequence, Optional
import requests
import json


class TavilySearch():
Expand All @@ -18,9 +19,12 @@ def __init__(self, query, headers=None, topic="general"):
"""
self.query = query
self.headers = headers or {}
self.api_key = self.get_api_key()
self.client = TavilyClient(self.api_key)
self.topic = topic
self.base_url = "https://api.tavily.com/search"
self.api_key = self.get_api_key()
self.headers = {
"Content-Type": "application/json",
}

def get_api_key(self):
"""
Expand All @@ -36,6 +40,45 @@ def get_api_key(self):
raise Exception("Tavily API key not found. Please set the TAVILY_API_KEY environment variable.")
return api_key

def _search(self,
query: str,
search_depth: Literal["basic", "advanced"] = "basic",
topic: str = "general",
days: int = 2,
max_results: int = 5,
include_domains: Sequence[str] = None,
exclude_domains: Sequence[str] = None,
include_answer: bool = False,
include_raw_content: bool = False,
include_images: bool = False,
use_cache: bool = True,
) -> dict:
"""
Internal search method to send the request to the API.
"""

data = {
"query": query,
"search_depth": search_depth,
"topic": topic,
"days": days,
"include_answer": include_answer,
"include_raw_content": include_raw_content,
"max_results": max_results,
"include_domains": include_domains,
"exclude_domains": exclude_domains,
"include_images": include_images,
"api_key": self.api_key,
"use_cache": use_cache,
}

response = requests.post(self.base_url, data=json.dumps(data), headers=self.headers, timeout=100)

if response.status_code == 200:
return response.json()
else:
response.raise_for_status() # Raises a HTTPError if the HTTP request returned an unsuccessful status code

def search(self, max_results=7):
"""
Searches the query
Expand All @@ -44,18 +87,13 @@ def search(self, max_results=7):
"""
try:
# Search the query
results = self.client.search(self.query, search_depth="basic", max_results=max_results, topic=self.topic)
results = self._search(self.query, search_depth="basic", max_results=max_results, topic=self.topic)
sources = results.get("results", [])
if not sources:
raise Exception("No results found with Tavily API search.")
# Return the results
search_response = [{"href": obj["url"], "body": obj["content"]} for obj in sources]
except Exception as e: # Fallback in case overload on Tavily Search API
print(f"Error: {e}. Fallback to DuckDuckGo Search API...")
try:
ddg = DDGS()
search_response = ddg.text(self.query, region='wt-wt', max_results=max_results)
except Exception as e:
print(f"Error: {e}. Failed fetching sources. Resulting in empty response.")
search_response = []
except Exception as e:
print(f"Error: {e}. Failed fetching sources. Resulting in empty response.")
search_response = []
return search_response
10 changes: 10 additions & 0 deletions gpt_researcher/retrievers/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import importlib.util


def check_pkg(pkg: str) -> None:
if not importlib.util.find_spec(pkg):
pkg_kebab = pkg.replace("_", "-")
raise ImportError(
f"Unable to import {pkg_kebab}. Please install with "
f"`pip install -U {pkg_kebab}`"
)
2 changes: 0 additions & 2 deletions gpt_researcher/scraper/__init__.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@

from .beautiful_soup.beautiful_soup import BeautifulSoupScraper
from .newspaper.newspaper import NewspaperScraper
from .web_base_loader.web_base_loader import WebBaseLoaderScraper
from .arxiv.arxiv import ArxivScraper
from .pymupdf.pymupdf import PyMuPDFScraper

__all__ = [
"BeautifulSoupScraper",
"NewspaperScraper",
"WebBaseLoaderScraper",
"ArxivScraper",
"PyMuPDFScraper"
Expand Down
Empty file.
42 changes: 0 additions & 42 deletions gpt_researcher/scraper/newspaper/newspaper.py

This file was deleted.

2 changes: 0 additions & 2 deletions gpt_researcher/scraper/scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from gpt_researcher.scraper import (
ArxivScraper,
BeautifulSoupScraper,
NewspaperScraper,
PyMuPDFScraper,
WebBaseLoaderScraper,
)
Expand Down Expand Up @@ -74,7 +73,6 @@ def get_scraper(self, link):
SCRAPER_CLASSES = {
"pdf": PyMuPDFScraper,
"arxiv": ArxivScraper,
"newspaper": NewspaperScraper,
"bs": BeautifulSoupScraper,
"web_base_loader": WebBaseLoaderScraper,
}
Expand Down
39 changes: 2 additions & 37 deletions gpt_researcher/utils/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,8 @@


def get_llm(llm_provider, **kwargs):
match llm_provider:
case "openai":
from ..llm_provider import OpenAIProvider
llm_provider = OpenAIProvider
case "azureopenai":
from ..llm_provider import AzureOpenAIProvider
llm_provider = AzureOpenAIProvider
case "google":
from ..llm_provider import GoogleProvider
llm_provider = GoogleProvider
case "ollama":
from ..llm_provider import OllamaProvider
llm_provider = OllamaProvider
case "groq":
from ..llm_provider import GroqProvider
llm_provider = GroqProvider
case "together":
from ..llm_provider import TogetherProvider
llm_provider = TogetherProvider
case "huggingface":
from ..llm_provider import HuggingFaceProvider
llm_provider = HuggingFaceProvider
case "mistral":
from ..llm_provider import MistralProvider
llm_provider = MistralProvider
case "anthropic":
from ..llm_provider import AnthropicProvider
llm_provider = AnthropicProvider
case "unify":
from ..llm_provider import UnifyProvider
llm_provider = UnifyProvider
# Generic case for all other providers supported by Langchain
case _:
from gpt_researcher.llm_provider import GenericLLMProvider
return GenericLLMProvider.from_provider(llm_provider, **kwargs)

return llm_provider(**kwargs)
from gpt_researcher.llm_provider import GenericLLMProvider
return GenericLLMProvider.from_provider(llm_provider, **kwargs)


async def create_chat_completion(
Expand Down
1 change: 0 additions & 1 deletion multi_agents/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
langgraph
gpt_researcher
langchain-community
langgraph-cli
python-dotenv
weasyprint
Expand Down
Loading

2 comments on commit d9433d3

@alarichartsock
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why was duckduckgo search removed?

@assafelovic
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@alarichartsock why removed? It's still supported, just you'd need to install it manually with pip. We've removed all dependencies to make gpt researcher more lightweight. check out the details here: https://docs.gptr.dev/docs/gpt-researcher/search-engines/retrievers

Please sign in to comment.