Skip to content

Commit

Permalink
Merge branch 'infiniflow:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
isthaison authored Nov 22, 2024
2 parents 918899e + 8750963 commit 42936b1
Show file tree
Hide file tree
Showing 26 changed files with 157 additions and 132 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ jobs:
- name: Build ragflow:dev-slim
run: |
RUNNER_WORKSPACE_PREFIX=${RUNNER_WORKSPACE_PREFIX:-$HOME}
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb ${RUNNER_WORKSPACE_PREFIX}/tika-server*.jar* .
sudo docker pull ubuntu:24.04
sudo ./build_docker_image.sh slim
cp -r ${RUNNER_WORKSPACE_PREFIX}/huggingface.co ${RUNNER_WORKSPACE_PREFIX}/nltk_data ${RUNNER_WORKSPACE_PREFIX}/libssl*.deb ${RUNNER_WORKSPACE_PREFIX}/tika-server*.jar* ${RUNNER_WORKSPACE_PREFIX}/chrome* ${RUNNER_WORKSPACE_PREFIX}/cl100k_base.tiktoken .
sudo docker pull ubuntu:22.04
sudo docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
- name: Build ragflow:dev
run: |
sudo ./build_docker_image.sh full
sudo docker build -f Dockerfile -t infiniflow/ragflow:dev .
- name: Start ragflow:dev-slim
run: |
Expand Down
35 changes: 29 additions & 6 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# base stage
FROM ubuntu:22.04 AS base
USER root
SHELL ["/bin/bash", "-c"]

ARG ARCH=amd64
ENV LIGHTEN=0

WORKDIR /ragflow
Expand All @@ -18,7 +18,7 @@ RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g'

RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget \
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
&& rm -rf /var/lib/apt/lists/*

RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
Expand All @@ -28,8 +28,11 @@ RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple &&
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
if [ "${ARCH}" = "amd64" ]; then \
--mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
if [ "$(uname -m)" = "x86_64" ]; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ "$(uname -m)" = "aarch64" ]; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi

ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
Expand All @@ -56,6 +59,24 @@ USER root

WORKDIR /ragflow

COPY .git /ragflow/.git

RUN current_commit=$(git rev-parse --short HEAD); \
last_tag=$(git describe --tags --abbrev=0); \
commit_count=$(git rev-list --count "$last_tag..HEAD"); \
version_info=""; \
if [ "$commit_count" -eq 0 ]; then \
version_info=$last_tag; \
else \
version_info="$current_commit($last_tag~$commit_count)"; \
fi; \
if [ "$LIGHTEN" == "1" ]; then \
version_info="$version_info slim"; \
else \
version_info="$version_info full"; \
fi; \
echo $version_info > /ragflow/VERSION

COPY web web
COPY docs docs
RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
Expand All @@ -65,10 +86,10 @@ RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
COPY pyproject.toml poetry.toml poetry.lock ./

RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
if [ "$LIGHTEN" -eq 0 ]; then \
poetry install --no-root --with=full; \
else \
if [ "$LIGHTEN" == "1" ]; then \
poetry install --no-root; \
else \
poetry install --no-root --with=full; \
fi

# production stage
Expand All @@ -77,6 +98,8 @@ USER root

WORKDIR /ragflow

COPY --from=builder /ragflow/VERSION /ragflow/VERSION

# Install python packages' dependencies
# cv2 requires libGL.so.1
RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
Expand Down
35 changes: 29 additions & 6 deletions Dockerfile.slim
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# base stage
FROM ubuntu:22.04 AS base
USER root
SHELL ["/bin/bash", "-c"]

ARG ARCH=amd64
ENV LIGHTEN=1

WORKDIR /ragflow
Expand All @@ -18,7 +18,7 @@ RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g'

RUN --mount=type=cache,id=ragflow_base_apt,target=/var/cache/apt,sharing=locked \
apt update && DEBIAN_FRONTEND=noninteractive apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 pkg-config libicu-dev libgdiplus default-jdk python3-pip pipx \
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget \
libatk-bridge2.0-0 libgtk-4-1 libnss3 xdg-utils unzip libgbm-dev wget git \
&& rm -rf /var/lib/apt/lists/*

RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple && pip3 config set global.trusted-host "pypi.tuna.tsinghua.edu.cn mirrors.pku.edu.cn" && pip3 config set global.extra-index-url "https://mirrors.pku.edu.cn/pypi/web/simple" \
Expand All @@ -28,8 +28,11 @@ RUN pip3 config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple &&
# https://forum.aspose.com/t/aspose-slides-for-net-no-usable-version-of-libssl-found-with-linux-server/271344/13
# aspose-slides on linux/arm64 is unavailable
RUN --mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_amd64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb \
if [ "${ARCH}" = "amd64" ]; then \
--mount=type=bind,source=libssl1.1_1.1.1f-1ubuntu2_arm64.deb,target=/root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb \
if [ "$(uname -m)" = "x86_64" ]; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_amd64.deb; \
elif [ "$(uname -m)" = "aarch64" ]; then \
dpkg -i /root/libssl1.1_1.1.1f-1ubuntu2_arm64.deb; \
fi

ENV PYTHONDONTWRITEBYTECODE=1 DOTNET_SYSTEM_GLOBALIZATION_INVARIANT=1
Expand All @@ -56,6 +59,24 @@ USER root

WORKDIR /ragflow

COPY .git /ragflow/.git

RUN current_commit=$(git rev-parse --short HEAD); \
last_tag=$(git describe --tags --abbrev=0); \
commit_count=$(git rev-list --count "$last_tag..HEAD"); \
version_info=""; \
if [ "$commit_count" -eq 0 ]; then \
version_info=$last_tag; \
else \
version_info="$current_commit($last_tag~$commit_count)"; \
fi; \
if [ "$LIGHTEN" == "1" ]; then \
version_info="$version_info slim"; \
else \
version_info="$version_info full"; \
fi; \
echo $version_info > /ragflow/VERSION

COPY web web
COPY docs docs
RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
Expand All @@ -65,10 +86,10 @@ RUN --mount=type=cache,id=ragflow_builder_npm,target=/root/.npm,sharing=locked \
COPY pyproject.toml poetry.toml poetry.lock ./

RUN --mount=type=cache,id=ragflow_builder_poetry,target=/root/.cache/pypoetry,sharing=locked \
if [ "$LIGHTEN" -eq 0 ]; then \
poetry install --no-root --with=full; \
else \
if [ "$LIGHTEN" == "1" ]; then \
poetry install --no-root; \
else \
poetry install --no-root --with=full; \
fi

# production stage
Expand All @@ -77,6 +98,8 @@ USER root

WORKDIR /ragflow

COPY --from=builder /ragflow/VERSION /ragflow/VERSION

# Install python packages' dependencies
# cv2 requires libGL.so.1
RUN --mount=type=cache,id=ragflow_production_apt,target=/var/cache/apt,sharing=locked \
Expand Down
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ Try our demo at [https://demo.ragflow.io](https://demo.ragflow.io).

## 🔥 Latest Updates

- 2024-11-01 Adds keyword extraction and related question generation to the parsed chunk to improve the accuracy of retrieval.
- 2024-11-22 Adds more variables to Agent.
- 2024-11-01 Adds keyword extraction and related question generation to the parsed chunks to improve the accuracy of retrieval.
- 2024-09-13 Adds search mode for knowledge base Q&A.
- 2024-09-09 Adds a medical consultant agent template.
- 2024-08-22 Support text to SQL statements through RAG.
- 2024-08-02 Supports GraphRAG inspired by [graphrag](https://github.com/microsoft/graphrag) and mind map.

Expand Down Expand Up @@ -274,7 +274,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh slim
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
```
## 🔧 Build a Docker image including embedding models
Expand All @@ -286,7 +286,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh full
docker build -f Dockerfile -t infiniflow/ragflow:dev .
```
## 🔨 Launch service from source for development
Expand Down
6 changes: 3 additions & 3 deletions README_id.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ Coba demo kami di [https://demo.ragflow.io](https://demo.ragflow.io).

## 🔥 Pembaruan Terbaru

- 22-11-2024 Peningkatan definisi dan penggunaan variabel di Agen.
- 2024-11-01: Penambahan ekstraksi kata kunci dan pembuatan pertanyaan terkait untuk meningkatkan akurasi pengambilan.
- 2024-09-13: Penambahan mode pencarian untuk Q&A basis pengetahuan.
- 2024-09-09: Penambahan template agen konsultan medis.
- 2024-08-22: Dukungan untuk teks ke pernyataan SQL melalui RAG.
- 2024-08-02: Dukungan GraphRAG yang terinspirasi oleh [graphrag](https://github.com/microsoft/graphrag) dan mind map.

Expand Down Expand Up @@ -249,7 +249,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh slim
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
```
## 🔧 Membangun Docker Image Termasuk Model Embedding
Expand All @@ -261,7 +261,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh full
docker build -f Dockerfile -t infiniflow/ragflow:dev .
```

## 🔨 Menjalankan Aplikasi dari untuk Pengembangan
Expand Down
6 changes: 3 additions & 3 deletions README_ja.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@

## 🔥 最新情報

- 2024-11-22 エージェントでの変数の定義と使用法を改善しました。
- 2024-11-01 再現の精度を向上させるために、解析されたチャンクにキーワード抽出と関連質問の生成を追加しました。
- 2024-09-13 ナレッジベース Q&A の検索モードを追加しました。
- 2024-09-09 エージェントに医療相談テンプレートを追加しました。
- 2024-08-22 RAG を介して SQL ステートメントへのテキストをサポートします。
- 2024-08-02 [graphrag](https://github.com/microsoft/graphrag) からインスピレーションを得た GraphRAG とマインド マップをサポートします。

Expand Down Expand Up @@ -230,7 +230,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh slim
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
```

## 🔧 ソースコードをコンパイルしたDockerイメージ(埋め込みモデルを含む)
Expand All @@ -242,7 +242,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh full
docker build -f Dockerfile -t infiniflow/ragflow:dev .
```

## 🔨 ソースコードからサービスを起動する方法
Expand Down
8 changes: 4 additions & 4 deletions README_ko.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@

## 🔥 업데이트

- 2024-11-22 에이전트의 변수 정의 및 사용을 개선했습니다.

- 2024-11-01 파싱된 청크에 키워드 추출 및 관련 질문 생성을 추가하여 재현율을 향상시킵니다.

- 2024-09-13 지식베이스 Q&A 검색 모드를 추가합니다.

- 2024-09-09 Agent에 의료상담 템플릿을 추가하였습니다.

- 2024-08-22 RAG를 통해 SQL 문에 텍스트를 지원합니다.

- 2024-08-02: [graphrag](https://github.com/microsoft/graphrag)와 마인드맵에서 영감을 받은 GraphRAG를 지원합니다.
Expand Down Expand Up @@ -232,7 +232,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh slim
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
```

## 🔧 소스 코드로 Docker 이미지를 컴파일합니다(임베딩 모델 포함)
Expand All @@ -244,7 +244,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh full
docker build -f Dockerfile -t infiniflow/ragflow:dev .
```

## 🔨 소스 코드로 서비스를 시작합니다.
Expand Down
8 changes: 4 additions & 4 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@

## 🔥 近期更新

- 2024-11-01 对解析后的chunk加入关键词抽取和相关问题生成以提高召回的准确度。
- 2024-11-22 完善了 Agent 中的变量定义和使用。
- 2024-11-01 对解析后的 chunk 加入关键词抽取和相关问题生成以提高召回的准确度。
- 2024-09-13 增加知识库问答搜索模式。
- 2024-09-09 在 Agent 中加入医疗问诊模板。
- 2024-08-22 支持用 RAG 技术实现从自然语言到 SQL 语句的转换。
- 2024-08-02 支持 GraphRAG 启发于 [graphrag](https://github.com/microsoft/graphrag) 和思维导图。

Expand Down Expand Up @@ -237,7 +237,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh slim
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
```

## 🔧 源码编译 Docker 镜像(包含 embedding 模型)
Expand All @@ -249,7 +249,7 @@ git clone https://github.com/infiniflow/ragflow.git
cd ragflow/
pip3 install huggingface-hub nltk
python3 download_deps.py
bash build_docker_image.sh full
docker build -f Dockerfile -t infiniflow/ragflow:dev .
```

## 🔨 以源代码启动服务
Expand Down
49 changes: 35 additions & 14 deletions api/versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,51 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

import os
import subprocess

def get_ragflow_version() -> str:
return RAGFLOW_VERSION_INFO
RAGFLOW_VERSION_INFO = "unknown"


RAGFLOW_VERSION_INFO = "dev"
def get_ragflow_version() -> str:
global RAGFLOW_VERSION_INFO
if RAGFLOW_VERSION_INFO != "unknown":
return RAGFLOW_VERSION_INFO
version_path = os.path.abspath(
os.path.join(
os.path.dirname(os.path.realpath(__file__)), os.pardir, "VERSION"
)
)
if os.path.exists(version_path):
with open(version_path, "r") as f:
RAGFLOW_VERSION_INFO = f.read().strip()
else:
RAGFLOW_VERSION_INFO = get_closest_tag_and_count()
LIGHTEN = int(os.environ.get("LIGHTEN", "0"))
RAGFLOW_VERSION_INFO += " slim" if LIGHTEN == 1 else " full"
return RAGFLOW_VERSION_INFO


def get_closest_tag_and_count():
try:
# Get the current commit hash
commit_id = subprocess.check_output(['git', 'rev-parse', '--short', 'HEAD']).strip().decode('utf-8')
commit_id = (
subprocess.check_output(["git", "rev-parse", "--short", "HEAD"])
.strip()
.decode("utf-8")
)
# Get the closest tag
closest_tag = subprocess.check_output(['git', 'describe', '--tags', '--abbrev=0']).strip().decode('utf-8')
# Get the commit hash of the closest tag
closest_tag_commit = subprocess.check_output(['git', 'rev-list', '-n', '1', closest_tag]).strip().decode(
'utf-8')
closest_tag = (
subprocess.check_output(["git", "describe", "--tags", "--abbrev=0"])
.strip()
.decode("utf-8")
)
# Get the commit count since the closest tag
process = subprocess.Popen(['git', 'rev-list', '--count', f'{closest_tag}..HEAD'], stdout=subprocess.PIPE)
process = subprocess.Popen(
["git", "rev-list", "--count", f"{closest_tag}..HEAD"],
stdout=subprocess.PIPE,
)
commits_count, _ = process.communicate()
commits_count = int(commits_count.strip())

Expand All @@ -41,8 +66,4 @@ def get_closest_tag_and_count():
else:
return f"{commit_id}({closest_tag}~{commits_count})"
except Exception:
return 'unknown'


if RAGFLOW_VERSION_INFO == 'dev':
RAGFLOW_VERSION_INFO = get_closest_tag_and_count()
return "unknown"
Loading

0 comments on commit 42936b1

Please sign in to comment.