Merge pull request #1085 from ElishaKay/polishing

docs and docker fixes
assafelovic · Jan 25, 2025 · 967deb9 · 967deb9
2 parents 1bb7acf + aa3c9cf
commit 967deb9
Show file tree

Hide file tree

Showing 21 changed files with 193 additions and 79 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -31,8 +31,12 @@ FROM gpt-researcher-install AS gpt-researcher
 
 # Create a non-root user for security
 RUN useradd -ms /bin/bash gpt-researcher && \
-    chown -R gpt-researcher:gpt-researcher /usr/src/app
-
+    chown -R gpt-researcher:gpt-researcher /usr/src/app && \
+    # Add these lines to create and set permissions for outputs directory
+    mkdir -p /usr/src/app/outputs && \
+    chown -R gpt-researcher:gpt-researcher /usr/src/app/outputs && \
+    chmod 777 /usr/src/app/outputs
+
 USER gpt-researcher
 WORKDIR /usr/src/app
 

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -9,17 +9,21 @@ services:
       LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
       LOGGING_LEVEL: INFO
     volumes:
-      - ./outputs:/usr/src/app/outputs
+      - ${PWD}/outputs:/usr/src/app/outputs:rw
+      - ${PWD}/logs:/usr/src/app/logs:rw
+    user: root
     restart: always
     ports:
       - 8000:8000
+
   gptr-nextjs:
     pull_policy: build
     image: gptresearcher/gptr-nextjs
     stdin_open: true
     environment:
       CHOKIDAR_USEPOLLING: true
       LOGGING_LEVEL: INFO
+      NEXT_PUBLIC_GA_MEASUREMENT_ID: ${NEXT_PUBLIC_GA_MEASUREMENT_ID}
     build:
       dockerfile: Dockerfile.dev
       context: frontend/nextjs

diff --git a/docs/docs/gpt-researcher/context/data-ingestion.md b/docs/docs/gpt-researcher/context/data-ingestion.md
@@ -37,11 +37,11 @@ PGVECTOR_CONNECTION_STRING=postgresql://username:password...
 ```
 
 Below is a custom data ingestion process that you can use to ingest your data into a Langchain VectorStore. See a [full working example here](https://github.com/assafelovic/gpt-researcher/pull/819#issue-2501632831).
-In this example, we're using a Postgres VectorStore to embed data of a Github Branch, but you can use [any supported Langchain VectorStore](https://python.langchain.com/v0.2/docs/integrations/vectorstores/).hasattr
+In this example, we're using a Postgres VectorStore to embed data of a Github Branch, but you can use [any supported Langchain VectorStore](https://python.langchain.com/v0.2/docs/integrations/vectorstores/).
 
 Note that when you create the Langchain Documents, you should include as metadata the `source` and `title` fields in order for GPTR to leverage your Documents seamlessly. In the example below, we're splitting the documents list into chunks of 100 & then inserting 1 chunk at a time into the vector store.
 
-## Step 1: Transform your content into Langchain Documents
+### Step 1: Transform your content into Langchain Documents
 
 ```python
 from langchain_core.documents import Document
@@ -90,7 +90,7 @@ async def transform_to_langchain_docs(self, directory_structure):
     await save_to_vector_store(documents)
 ```
 
-## Step 2: Insert your Langchain Documents into a Langchain VectorStore
+### Step 2: Insert your Langchain Documents into a Langchain VectorStore
 
 ```python
 from langchain_postgres import PGVector
@@ -124,7 +124,7 @@ async def save_to_vector_store(self, documents):
         vector_store.add_documents(chunk, ids=[doc.metadata["id"] for doc in chunk])
 ```
 
-## Step 3: Pass your Langchain Vectorstore into your GPTR report
+### Step 3: Pass your Langchain Vectorstore into your GPTR report
 
 ```python
 async_connection_string = pgvector_connection_string.replace("postgresql://", "postgresql+psycopg://")

diff --git a/docs/docs/gpt-researcher/frontend/discord-bot.md b/docs/docs/gpt-researcher/frontend/discord-bot.md
@@ -1,35 +1,37 @@
-## Discord Bot
+# Discord Bot
+
+## Intro
+
+You can either leverage the official GPTR Discord bot or create your own custom bot.
+
+To add the official GPTR Discord bot, simply [click here to invite GPTR to your Discord server](https://discord.com/oauth2/authorize?client_id=1281438963034361856&permissions=1689934339898432&integration_type=0&scope=bot).
+
+
+## To create your own discord bot with GPTR functionality
 
 Add a .env file in the root of the project and add the following:
 
 ```
 DISCORD_BOT_TOKEN=
 DISCORD_CLIENT_ID=
 ```
+You can fetch the token from the Discord Developer Portal by following these steps:
 
-You can fetch the token from the Discord Developer Portal.
-
-Go to: https://discord.com/developers/applications/
-
-Click the "New Application" button and give your bot a name.
-
-Within the Oath2 tab, you can generate a URL to invite your bot to your server.
-First Select the "bot" scope.
-<img src="./img/oath2-url-generator.png"></img>
-
-Next, give your bot the proper permissions.
+1. Go to https://discord.com/developers/applications/
+2. Click the "New Application" button and give your bot a name
+3. Navigate to the OAuth2 tab to generate an invite URL for your bot
+4. Under "Scopes", select "bot"
 
-<img src="./img/bot-permissions.png"></img>
+![OAuth2 URL Generator](./img/oath2-url-generator.png)
 
-Finally you can invite your bot via the generated invite URL. In the case of the gptr-bot, here is the invite URL to open in your browser:
+5. Select the appropriate bot permissions
 
-https://discord.com/oauth2/authorize?client_id=1281438963034361856&permissions=1689934339898432&integration_type=0&scope=bot
+![Bot Permissions](./img/bot-permissions.png)
 
-<br></br>
-If you created your own custom bot, copy-paste the token into your .env file you created above.
+6. Copy your bot's token and paste it into the `.env` file you created earlier
 
 
-## Deploying the bot commands
+### Deploying the bot commands
 
 ```bash
 node deploy-commands.js
@@ -38,13 +40,13 @@ node deploy-commands.js
 In our case, this will make the "ask" and "ping" commands available to users of the bot.
 
 
-## Running the bot via Docker
+### Running the bot via Docker
 
 ```bash
 docker compose --profile discord run --rm discord-bot
 ```
 
-## Running the bot via CLI
+### Running the bot via CLI
 
 ```bash
 # install dependencies

diff --git a/docs/docs/gpt-researcher/frontend/nextjs-frontend.md b/docs/docs/gpt-researcher/frontend/nextjs-frontend.md
@@ -2,7 +2,10 @@
 
 This frontend project aims to enhance the user experience of GPT Researcher, providing an intuitive and efficient interface for automated research. It offers two deployment options to suit different needs and environments.
 
-View a Product Tutorial here: [GPT-Researcher Frontend Tutorial](https://www.youtube.com/watch?v=hIZqA6lPusk)
+#### Demo
+<iframe height="400" width="700" src="https://github.com/user-attachments/assets/092e9e71-7e27-475d-8c4f-9dddd28934a3" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
+
+View an in-depth Product Tutorial here: [GPT-Researcher Frontend Tutorial](https://www.youtube.com/watch?v=hIZqA6lPusk)
 
 
 ## NextJS Frontend App
@@ -74,6 +77,11 @@ A more robust solution with enhanced features and performance.
 
 Note: Requires backend server on `localhost:8000` as detailed in option 1.
 
-#### Demo
-<iframe height="400" width="700" src="https://github.com/user-attachments/assets/092e9e71-7e27-475d-8c4f-9dddd28934a3" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
 
+### Adding Google Analytics
+
+To add Google Analytics to your NextJS frontend, simply add the following to your `.env` file:
+
+```
+NEXT_PUBLIC_GA_MEASUREMENT_ID="G-G2YVXKHJNZ"
+```
diff --git a/docs/docs/gpt-researcher/frontend/vanilla-js-frontend.md b/docs/docs/gpt-researcher/frontend/vanilla-js-frontend.md
@@ -1,6 +1,9 @@
 # Vanilla JS Frontend
 
-A lightweight solution using FastAPI to serve static files.
+The VanillaJS frontend is a lightweight solution leveraging FastAPI to serve static files.
+
+### Demo
+<iframe height="400" width="700" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/dd6cf08f-b31e-40c6-9907-1915f52a7110" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
 
 #### Prerequisites
 - Python 3.11+
@@ -19,6 +22,3 @@ A lightweight solution using FastAPI to serve static files.
    ```
 
 3. Access at `http://localhost:8000`
-
-#### Demo
-<iframe height="400" width="700" src="https://github.com/assafelovic/gpt-researcher/assets/13554167/dd6cf08f-b31e-40c6-9907-1915f52a7110" frameborder="0" allow="autoplay; encrypted-media" allowfullscreen></iframe>
diff --git a/...searcher/frontend/debugging-websockets.md → ...archer/frontend/visualizing-websockets.md b/...searcher/frontend/debugging-websockets.md → ...archer/frontend/visualizing-websockets.md
@@ -1,4 +1,4 @@
-# Debugging Websockets
+# Visualizing Websockets
 
 The GPTR Frontend is powered by Websockets streaming back from the Backend. This allows for real-time updates on the status of your research tasks, as well as the ability to interact with the Backend directly from the Frontend.
 
@@ -12,7 +12,7 @@ Here's how:
 ![image](https://github.com/user-attachments/assets/15fcb5a4-77ea-4b3b-87d7-55d4b6f80095)
 
 
-    ### Am I polling the right URL?
+## Am I polling the right URL?
 
 If you're concerned that your frontend isn't hitting the right API Endpoint, you can check the URL in the Network Tab.
 

diff --git a/docs/docs/gpt-researcher/gptr/config.md b/docs/docs/gpt-researcher/gptr/config.md
@@ -53,28 +53,3 @@ To learn more about additional LLM support you can check out the docs [here](/do
 
 You can also include your own external JSON file `config.json` by adding the path in the `config_file` param.
 
-## Example: Azure OpenAI Configuration
-
-If you are not using OpenAI's models, but other model providers, besides the general configuration above, also additional environment variables are required.
-Check the [langchain documentation](https://python.langchain.com/v0.2/docs/integrations/platforms/) about your model for the exact configuration of the API keys and endpoints.
-
-Here is an example for [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models) configuration:
-
-```bash
-
-OPENAI_API_VERSION="2024-05-01-preview" # or whatever you are using
-AZURE_OPENAI_ENDPOINT="https://CHANGEMEN.openai.azure.com/" # change to the name of your deployment
-AZURE_OPENAI_API_KEY="[Your Key]" # change to your API key
-
-EMBEDDING="azure_openai:text-embedding-ada-002" # change to the deployment of your embedding model
-
-FAST_LLM="azure_openai:gpt-4o-mini" # change to the name of your deployment (not model-name)
-FAST_TOKEN_LIMIT=4000
-
-SMART_LLM="azure_openai:gpt-4o" # change to the name of your deployment (not model-name)
-SMART_TOKEN_LIMIT=4000
-
-RETRIEVER="bing" # if you are using Bing as your search engine (which is likely if you use Azure)
-BING_API_KEY="[Your Key]"
-
-```
diff --git a/docs/docs/gpt-researcher/gptr/logs.md → ...esearcher/handling-logs/all-about-logs.md b/docs/docs/gpt-researcher/gptr/logs.md → ...esearcher/handling-logs/all-about-logs.md
@@ -1,4 +1,4 @@
-# Log Files
+# All About Logs
 
 This document explains how to interpret the log files generated for each report. These logs provide a detailed record of the research process, from initial task planning to the gathering of information, and finally, the report writing process. Reports may change over time as new features are developed. 
 

diff --git a/docs/docs/gpt-researcher/handling-logs/langsmith-logs.md b/docs/docs/gpt-researcher/handling-logs/langsmith-logs.md
@@ -0,0 +1,28 @@
+# Langsmith Logs
+
+With the help of Langsmith, you can easily visualize logs on cost and errors within your Langsmith Dashboard (calculated per LLM call or grouped by project)
+
+Here are the steps to setup Langsmith:
+
+Step 1: Setup a Langsmith account at: [smith.langchain.com](https://smith.langchain.com)
+
+Step 2: Create a new API key at: [smith.langchain.com/settings](https://smith.langchain.com/settings)
+
+Step 3: Add these 2 environment variables:
+
+```bash
+LANGCHAIN_TRACING_V2=true
+LANGCHAIN_API_KEY=Set this to your API key
+```
+
+Here's what this looks like in the Langsmith Dashboard:
+
+![Langsmith Dashboard](./langsmith.png)
+
+This can be helpful for: 
+
+- Enabling users to visualize and inspect the backend data flow
+- Quality assurance debugging - where can the input or output of our AI flows use improvement
+- Cost analysis - where are we spending the most on LLM calls
+- Error analysis - where are we getting the most errors
+- Optimizing speed - which parts of the flow are taking the most time
diff --git a/docs/docs/gpt-researcher/handling-logs/langsmith.png b/docs/docs/gpt-researcher/handling-logs/langsmith.png
diff --git a/...cher/gptr/handling-logs-as-they-stream.md → ...cher/handling-logs/simple-logs-example.md b/...cher/gptr/handling-logs-as-they-stream.md → ...cher/handling-logs/simple-logs-example.md
@@ -1,4 +1,4 @@
-# Handling Logs
+# Simple Logs Example
 
 Here is a snippet of code to help you handle the streaming logs of your Research tasks.
 

diff --git a/docs/docs/gpt-researcher/llms/running-with-azure.md b/docs/docs/gpt-researcher/llms/running-with-azure.md
@@ -0,0 +1,24 @@
+## Example: Azure OpenAI Configuration
+
+If you are not using OpenAI's models, but other model providers, besides the general configuration above, also additional environment variables are required.
+
+Here is an example for [Azure OpenAI](https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models) configuration:
+
+```bash
+OPENAI_API_VERSION="2024-05-01-preview" # or whatever you are using
+AZURE_OPENAI_ENDPOINT="https://CHANGEMEN.openai.azure.com/" # change to the name of your deployment
+AZURE_OPENAI_API_KEY="[Your Key]" # change to your API key
+
+EMBEDDING="azure_openai:text-embedding-ada-002" # change to the deployment of your embedding model
+
+FAST_LLM="azure_openai:gpt-4o-mini" # change to the name of your deployment (not model-name)
+FAST_TOKEN_LIMIT=4000
+
+SMART_LLM="azure_openai:gpt-4o" # change to the name of your deployment (not model-name)
+SMART_TOKEN_LIMIT=4000
+
+RETRIEVER="bing" # if you are using Bing as your search engine (which is likely if you use Azure)
+BING_API_KEY="[Your Key]"
+```
+
+For more details on what each variable does, you can check out the [GPTR Config Docs](https://docs.gptr.dev/docs/gpt-researcher/gptr/config)
diff --git a/docs/docs/gpt-researcher/llms/supported-llms.md b/docs/docs/gpt-researcher/llms/supported-llms.md
@@ -0,0 +1,25 @@
+# Supported LLMs
+
+The following LLMs are supported by GPTR. Please note that you'll need to install the relevant langchain package for each LLM.
+
+- openai
+- anthropic
+- azure_openai
+- cohere
+- google_vertexai
+- google_genai
+- fireworks
+- ollama
+- together
+- mistralai
+- huggingface
+- groq
+- bedrock
+- dashscope
+- xai
+- deepseek
+- litellm
+
+The GPTR LLM Module is built on top of the [Langchain LLM Module](https://python.langchain.com/v0.2/docs/integrations/llms/).
+
+If you'd like to add a new LLM into GPTR, you can start with the [langchain documentation](https://python.langchain.com/v0.2/docs/integrations/platforms/) and then look into integrating it into the GPTR LLM Module.
diff --git a/docs/sidebars.js b/docs/sidebars.js
@@ -36,11 +36,9 @@
         'gpt-researcher/gptr/example',
         'gpt-researcher/gptr/config',
         'gpt-researcher/gptr/scraping',
-        'gpt-researcher/gptr/handling-logs-as-they-stream',
         'gpt-researcher/gptr/querying-the-backend',
         'gpt-researcher/gptr/automated-tests',
-        'gpt-researcher/gptr/troubleshooting',
-        'gpt-researcher/gptr/logs'
+        'gpt-researcher/gptr/troubleshooting'
       ],
     },
     {
@@ -53,7 +51,7 @@
         'gpt-researcher/frontend/nextjs-frontend',
         'gpt-researcher/frontend/vanilla-js-frontend',
         'gpt-researcher/frontend/discord-bot',
-        'gpt-researcher/frontend/debugging-websockets'
+        'gpt-researcher/frontend/visualizing-websockets'
       ],
     },
     {
@@ -69,15 +67,28 @@
         'gpt-researcher/context/data-ingestion'
         ]
     },
+    {
+      type: 'category',
+      label: 'Handling Logs',
+      collapsible: true,
+      collapsed: true,
+      items: [
+        'gpt-researcher/handling-logs/all-about-logs',
+        'gpt-researcher/handling-logs/simple-logs-example',
+        'gpt-researcher/handling-logs/langsmith-logs'
+        ]
+    },
     {
       type: 'category',
       label: 'LLM Providers',
       collapsible: true,
       collapsed: true,
       items: [
         'gpt-researcher/llms/llms',
-        'gpt-researcher/llms/running-with-ollama',
-        'gpt-researcher/llms/testing-your-llm'
+        'gpt-researcher/llms/supported-llms',
+        'gpt-researcher/llms/testing-your-llm',
+        'gpt-researcher/llms/running-with-azure',
+        'gpt-researcher/llms/running-with-ollama'
       ]
     },
     {

diff --git a/frontend/nextjs/app/layout.tsx b/frontend/nextjs/app/layout.tsx
@@ -1,6 +1,7 @@
 import type { Metadata } from "next";
 import { Lexend } from "next/font/google";
 import PlausibleProvider from "next-plausible";
+import { GoogleAnalytics } from '@next/third-parties/google'
 import "./globals.css";
 
 const inter = Lexend({ subsets: ["latin"] });
@@ -41,10 +42,12 @@ export default function RootLayout({
 }: Readonly<{
   children: React.ReactNode;
 }>) {
+
   return (
     <html lang="en">
       <head>
         <PlausibleProvider domain="localhost:3000" />
+        <GoogleAnalytics gaId={process.env.NEXT_PUBLIC_GA_MEASUREMENT_ID!} />
       </head>
       <body
         className={`${inter.className} flex min-h-screen flex-col justify-between`}