Cinnamon · taprosoft · Feb 2, 2025 · Dec 24, 2024 · Dec 23, 2024 · Dec 23, 2024
diff --git a/.dockerignore b/.dockerignore
@@ -11,3 +11,5 @@ env/
 README.md
 *.zip
 *.sh
+
+!/launch.sh
diff --git a/.env.example b/.env.example
@@ -3,8 +3,8 @@
 # settings for OpenAI
 OPENAI_API_BASE=https://api.openai.com/v1
 OPENAI_API_KEY=<YOUR_OPENAI_KEY>
-OPENAI_CHAT_MODEL=gpt-3.5-turbo
-OPENAI_EMBEDDINGS_MODEL=text-embedding-ada-002
+OPENAI_CHAT_MODEL=gpt-4o-mini
+OPENAI_EMBEDDINGS_MODEL=text-embedding-3-large
 
 # settings for Azure OpenAI
 AZURE_OPENAI_ENDPOINT=
@@ -17,10 +17,8 @@ AZURE_OPENAI_EMBEDDINGS_DEPLOYMENT=text-embedding-ada-002
 COHERE_API_KEY=<COHERE_API_KEY>
 
 # settings for local models
-LOCAL_MODEL=llama3.1:8b
+LOCAL_MODEL=qwen2.5:7b
 LOCAL_MODEL_EMBEDDINGS=nomic-embed-text
-LOCAL_EMBEDDING_MODEL_DIM = 768
-LOCAL_EMBEDDING_MODEL_MAX_TOKENS = 8192
 
 # settings for GraphRAG
 GRAPHRAG_API_KEY=<YOUR_OPENAI_KEY>

diff --git a/.github/workflows/build-push-docker.yaml b/.github/workflows/build-push-docker.yaml
@@ -28,6 +28,7 @@ jobs:
         target:
           - lite
           - full
+          - ollama
     steps:
       - name: Free Disk Space (Ubuntu)
         uses: jlumbroso/free-disk-space@main

diff --git a/.github/workflows/fly-deploy.yml b/.github/workflows/fly-deploy.yml
@@ -0,0 +1,18 @@
+# See https://fly.io/docs/app-guides/continuous-deployment-with-github-actions/
+
+name: Fly Deploy
+on:
+  push:
+    branches:
+      - main
+jobs:
+  deploy:
+    name: Deploy app
+    runs-on: ubuntu-latest
+    concurrency: deploy-group # optional: ensure only one action runs at a time
+    steps:
+      - uses: actions/checkout@v4
+      - uses: superfly/flyctl-actions/setup-flyctl@master
+      - run: flyctl deploy --remote-only
+        env:
+          FLY_API_TOKEN: ${{ secrets.FLY_API_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -57,6 +57,7 @@ repos:
             "types-requests",
             "sqlmodel",
             "types-Markdown",
+            "types-cachetools",
             types-tzlocal,
           ]
         args: ["--check-untyped-defs", "--ignore-missing-imports"]

diff --git a/Dockerfile b/Dockerfile
@@ -35,6 +35,7 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR
 
 # Copy contents
 COPY . /app
+COPY launch.sh /app/launch.sh
 COPY .env.example /app/.env
 
 # Install pip packages
@@ -54,7 +55,7 @@ RUN apt-get autoremove \
     && rm -rf /var/lib/apt/lists/* \
     && rm -rf ~/.cache
 
-CMD ["python", "app.py"]
+ENTRYPOINT ["sh", "/app/launch.sh"]
 
 # Full version
 FROM lite AS full
@@ -97,7 +98,17 @@ RUN apt-get autoremove \
     && rm -rf /var/lib/apt/lists/* \
     && rm -rf ~/.cache
 
-# Download nltk packages as required for unstructured
-# RUN python -c "from unstructured.nlp.tokenize import _download_nltk_packages_if_not_present; _download_nltk_packages_if_not_present()"
+ENTRYPOINT ["sh", "/app/launch.sh"]
 
-CMD ["python", "app.py"]
+# Ollama-bundled version
+FROM full AS ollama
+
+# Install ollama
+RUN --mount=type=ssh  \
+    --mount=type=cache,target=/root/.cache/pip  \
+    curl -fsSL https://ollama.com/install.sh | sh
+
+# RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull qwen2.5:7b
+RUN nohup bash -c "ollama serve &" && sleep 4 && ollama pull nomic-embed-text
+
+ENTRYPOINT ["sh", "/app/launch.sh"]
diff --git a/README.md b/README.md
@@ -96,18 +96,7 @@ documents and developers who want to build their own RAG pipeline.
 
 ### With Docker (recommended)
 
-1. We support both `lite` & `full` version of Docker images. With `full`, the extra packages of `unstructured` will be installed as well, it can support additional file types (`.doc`, `.docx`, ...) but the cost is larger docker image size. For most users, the `lite` image should work well in most cases.
-
-   - To use the `lite` version.
-
-     ```bash
-     docker run \
-     -e GRADIO_SERVER_NAME=0.0.0.0 \
-     -e GRADIO_SERVER_PORT=7860 \
-     -v ./ktem_app_data:/app/ktem_app_data \
-     -p 7860:7860 -it --rm \
-     ghcr.io/cinnamon/kotaemon:main-lite
-     ```
+1. We support both `lite` & `full` version of Docker images. With `full` version, the extra packages of `unstructured` will be installed, which can support additional file types (`.doc`, `.docx`, ...) but the cost is larger docker image size. For most users, the `lite` image should work well in most cases.
 
    - To use the `full` version.
 
@@ -124,9 +113,16 @@ documents and developers who want to build their own RAG pipeline.
 
      ```bash
      # change image name to
-     ghcr.io/cinnamon/kotaemon:feat-ollama_docker-full
+     docker run <...> ghcr.io/cinnamon/kotaemon:main-ollama
      ```
 
+   - To use the `lite` version.
+
+   ```bash
+    # change image name to
+    docker run <...> ghcr.io/cinnamon/kotaemon:main-lite
+   ```
+
 2. We currently support and test two platforms: `linux/amd64` and `linux/arm64` (for newer Mac). You can specify the platform by passing `--platform` in the `docker run` command. For example:
 
    ```bash

diff --git a/app.py b/app.py
@@ -3,6 +3,7 @@
 from theflow.settings import settings as flowsettings
 
 KH_APP_DATA_DIR = getattr(flowsettings, "KH_APP_DATA_DIR", ".")
+KH_GRADIO_SHARE = getattr(flowsettings, "KH_GRADIO_SHARE", False)
 GRADIO_TEMP_DIR = os.getenv("GRADIO_TEMP_DIR", None)
 # override GRADIO_TEMP_DIR if it's not set
 if GRADIO_TEMP_DIR is None:
@@ -21,4 +22,5 @@
         "libs/ktem/ktem/assets",
         GRADIO_TEMP_DIR,
     ],
+    share=KH_GRADIO_SHARE,
 )
diff --git a/docs/about.md b/docs/about.md
@@ -4,8 +4,8 @@ An open-source tool for chatting with your documents. Built with both end users
 developers in mind.
 
 [Source Code](https://github.com/Cinnamon/kotaemon) |
-[Live Demo](https://huggingface.co/spaces/cin-model/kotaemon-demo)
+[HF Space](https://huggingface.co/spaces/cin-model/kotaemon-demo)
 
-[User Guide](https://cinnamon.github.io/kotaemon/) |
+[Installation Guide](https://cinnamon.github.io/kotaemon/) |
 [Developer Guide](https://cinnamon.github.io/kotaemon/development/) |
 [Feedback](https://github.com/Cinnamon/kotaemon/issues)
diff --git a/docs/online_install.md b/docs/online_install.md
@@ -1,7 +1,7 @@
 ## Installation (Online HuggingFace Space)
 
 1. Go to [HF kotaemon_template](https://huggingface.co/spaces/cin-model/kotaemon_template).
-2. Use Duplicate function to create your own space.
+2. Use Duplicate function to create your own space. Or use this [direct link](https://huggingface.co/spaces/cin-model/kotaemon_template?duplicate=true).
    ![Duplicate space](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/duplicate_space.png)
    ![Change space params](https://raw.githubusercontent.com/Cinnamon/kotaemon/main/docs/images/change_space_params.png)
 3. Wait for the build to complete and start up (apprx 10 mins).

diff --git a/flowsettings.py b/flowsettings.py
@@ -25,7 +25,8 @@
     except Exception:
         KH_APP_VERSION = "local"
 
-KH_ENABLE_FIRST_SETUP = True
+KH_GRADIO_SHARE = config("KH_GRADIO_SHARE", default=False, cast=bool)
+KH_ENABLE_FIRST_SETUP = config("KH_ENABLE_FIRST_SETUP", default=True, cast=bool)
 KH_DEMO_MODE = config("KH_DEMO_MODE", default=False, cast=bool)
 KH_OLLAMA_URL = config("KH_OLLAMA_URL", default="http://localhost:11434/v1/")
 
@@ -65,6 +66,8 @@
 KH_DOC_DIR = this_dir / "docs"
 
 KH_MODE = "dev"
+KH_SSO_ENABLED = config("KH_SSO_ENABLED", default=False, cast=bool)
+
 KH_FEATURE_CHAT_SUGGESTION = config(
     "KH_FEATURE_CHAT_SUGGESTION", default=False, cast=bool
 )
@@ -137,43 +140,58 @@
             "default": False,
         }
 
-if config("OPENAI_API_KEY", default=""):
+OPENAI_DEFAULT = "<YOUR_OPENAI_KEY>"
+OPENAI_API_KEY = config("OPENAI_API_KEY", default=OPENAI_DEFAULT)
+GOOGLE_API_KEY = config("GOOGLE_API_KEY", default="your-key")
+IS_OPENAI_DEFAULT = len(OPENAI_API_KEY) > 0 and OPENAI_API_KEY != OPENAI_DEFAULT
+
+if OPENAI_API_KEY:
     KH_LLMS["openai"] = {
         "spec": {
             "__type__": "kotaemon.llms.ChatOpenAI",
             "temperature": 0,
             "base_url": config("OPENAI_API_BASE", default="")
             or "https://api.openai.com/v1",
-            "api_key": config("OPENAI_API_KEY", default=""),
-            "model": config("OPENAI_CHAT_MODEL", default="gpt-3.5-turbo"),
+            "api_key": OPENAI_API_KEY,
+            "model": config("OPENAI_CHAT_MODEL", default="gpt-4o-mini"),
             "timeout": 20,
         },
-        "default": True,
+        "default": IS_OPENAI_DEFAULT,
     }
     KH_EMBEDDINGS["openai"] = {
         "spec": {
             "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
             "base_url": config("OPENAI_API_BASE", default="https://api.openai.com/v1"),
-            "api_key": config("OPENAI_API_KEY", default=""),
+            "api_key": OPENAI_API_KEY,
             "model": config(
-                "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-ada-002"
+                "OPENAI_EMBEDDINGS_MODEL", default="text-embedding-3-large"
             ),
             "timeout": 10,
             "context_length": 8191,
         },
-        "default": True,
+        "default": IS_OPENAI_DEFAULT,
     }
 
 if config("LOCAL_MODEL", default=""):
     KH_LLMS["ollama"] = {
         "spec": {
             "__type__": "kotaemon.llms.ChatOpenAI",
             "base_url": KH_OLLAMA_URL,
-            "model": config("LOCAL_MODEL", default="llama3.1:8b"),
+            "model": config("LOCAL_MODEL", default="qwen2.5:7b"),
             "api_key": "ollama",
         },
         "default": False,
     }
+    KH_LLMS["ollama-long-context"] = {
+        "spec": {
+            "__type__": "kotaemon.llms.LCOllamaChat",
+            "base_url": KH_OLLAMA_URL.replace("v1/", ""),
+            "model": config("LOCAL_MODEL", default="qwen2.5:7b"),
+            "num_ctx": 8192,
+        },
+        "default": False,
+    }
+
     KH_EMBEDDINGS["ollama"] = {
         "spec": {
             "__type__": "kotaemon.embeddings.OpenAIEmbeddings",
@@ -183,7 +201,6 @@
         },
         "default": False,
     }
-
     KH_EMBEDDINGS["fast_embed"] = {
         "spec": {
             "__type__": "kotaemon.embeddings.FastEmbedEmbeddings",
@@ -205,9 +222,9 @@
     "spec": {
         "__type__": "kotaemon.llms.chats.LCGeminiChat",
         "model_name": "gemini-1.5-flash",
-        "api_key": config("GOOGLE_API_KEY", default="your-key"),
+        "api_key": GOOGLE_API_KEY,
     },
-    "default": False,
+    "default": not IS_OPENAI_DEFAULT,
 }
 KH_LLMS["groq"] = {
     "spec": {
@@ -241,8 +258,9 @@
     "spec": {
         "__type__": "kotaemon.embeddings.LCGoogleEmbeddings",
         "model": "models/text-embedding-004",
-        "google_api_key": config("GOOGLE_API_KEY", default="your-key"),
-    }
+        "google_api_key": GOOGLE_API_KEY,
+    },
+    "default": not IS_OPENAI_DEFAULT,
 }
 # KH_EMBEDDINGS["huggingface"] = {
 #     "spec": {
@@ -301,9 +319,12 @@
 
 USE_NANO_GRAPHRAG = config("USE_NANO_GRAPHRAG", default=False, cast=bool)
 USE_LIGHTRAG = config("USE_LIGHTRAG", default=True, cast=bool)
+USE_MS_GRAPHRAG = config("USE_MS_GRAPHRAG", default=True, cast=bool)
 
-GRAPHRAG_INDEX_TYPES = ["ktem.index.file.graph.GraphRAGIndex"]
+GRAPHRAG_INDEX_TYPES = []
 
+if USE_MS_GRAPHRAG:
+    GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.GraphRAGIndex")
 if USE_NANO_GRAPHRAG:
     GRAPHRAG_INDEX_TYPES.append("ktem.index.file.graph.NanoGraphRAGIndex")
 if USE_LIGHTRAG:
@@ -323,7 +344,7 @@
                 ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                 ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
             ),
-            "private": False,
+            "private": True,
         },
         "index_type": graph_type,
     }
@@ -338,7 +359,7 @@
                 ".png, .jpeg, .jpg, .tiff, .tif, .pdf, .xls, .xlsx, .doc, .docx, "
                 ".pptx, .csv, .html, .mhtml, .txt, .md, .zip"
             ),
-            "private": False,
+            "private": True,
         },
         "index_type": "ktem.index.file.FileIndex",
     },

diff --git a/fly.toml b/fly.toml
@@ -0,0 +1,26 @@
+# fly.toml app configuration file generated for kotaemon on 2024-12-24T20:56:32+07:00
+#
+# See https://fly.io/docs/reference/configuration/ for information about how to use this file.
+#
+
+app = 'kotaemon'
+primary_region = 'sin'
+
+[build]
+
+[mounts]
+  destination = "/app/ktem_app_data"
+  source = "ktem_volume"
+
+[http_service]
+  internal_port = 7860
+  force_https = true
+  auto_stop_machines = 'suspend'
+  auto_start_machines = true
+  min_machines_running = 0
+  processes = ['app']
+
+[[vm]]
+  memory = '4gb'
+  cpu_kind = 'shared'
+  cpus = 4
diff --git a/launch.sh b/launch.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+if [ -z "$GRADIO_SERVER_NAME" ]; then
+    export GRADIO_SERVER_NAME="0.0.0.0"
+fi
+if [ -z "$GRADIO_SERVER_PORT" ]; then
+    export GRADIO_SERVER_PORT="7860"
+fi
+
+# Check if environment variable KH_DEMO_MODE is set to true
+if [ "$KH_DEMO_MODE" = "true" ]; then
+    echo "KH_DEMO_MODE is true. Launching in demo mode..."
+    # Command to launch in demo mode
+    GR_FILE_ROOT_PATH="/app" KH_FEATURE_USER_MANAGEMENT=false USE_LIGHTRAG=false uvicorn sso_app_demo:app --host "$GRADIO_SERVER_NAME" --port "$GRADIO_SERVER_PORT"
+else
+    if [ "$KH_SSO_ENABLED" = "true" ]; then
+        echo "KH_SSO_ENABLED is true. Launching in SSO mode..."
+        GR_FILE_ROOT_PATH="/app" KH_SSO_ENABLED=true uvicorn sso_app:app --host "$GRADIO_SERVER_NAME" --port "$GRADIO_SERVER_PORT"
+    else
+        ollama serve &
+        python app.py
+    fi
+fi
-Original file line number
+Diff line change
@@ Expand Up / @@ -11,3 +11,5 @@ env/ @@
     README.md
     *.zip
     *.sh
+    !/launch.sh