Skip to content

Commit

Permalink
Doc for ragflow:dev
Browse files Browse the repository at this point in the history
  • Loading branch information
yuzhichang committed Sep 28, 2024
1 parent 1dd838e commit c4cfd8a
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 60 deletions.
113 changes: 99 additions & 14 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,23 +1,108 @@
FROM infiniflow/ragflow-base:v2.0
USER root
# base stage
FROM ubuntu:24.04 AS base
USER root

ENV LIGHTEN=0

WORKDIR /ragflow

ADD ./web ./web
RUN cd ./web && npm i --force && npm run build
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt-get --no-install-recommends install -y ca-certificates

# if you located in China, you can use tsinghua mirror to speed up apt
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y curl libpython3-dev nginx libglib2.0-0 libglx-mesa0 \
&& rm -rf /var/lib/apt/lists/* \
&& curl -sSL https://install.python-poetry.org | python3 -

ENV PYTHONDONTWRITEBYTECODE=1 LD_LIBRARY_PATH=usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH

# Configure Poetry
ENV POETRY_NO_INTERACTION=1
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
ENV POETRY_VIRTUALENVS_CREATE=true
ENV POETRY_REQUESTS_TIMEOUT=15

# builder stage
FROM base AS builder
USER root

WORKDIR /ragflow

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y nodejs npm cargo && \
rm -rf /var/lib/apt/lists/*

COPY web web
RUN cd web && npm i --force && npm run build

# install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./

RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
if [ "$LIGHTEN" -eq 0 ]; then \
/root/.local/bin/poetry install --sync --no-cache --no-root --with=full; \
else \
/root/.local/bin/poetry install --sync --no-cache --no-root; \
fi

# production stage
FROM base AS production
USER root

WORKDIR /ragflow

# Install python packages' dependencies
# cv2 requires libGL.so.1
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
rm -rf /var/lib/apt/lists/*

COPY web web
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
COPY pyproject.toml poetry.toml poetry.lock ./

# Copy models downloaded via download_deps.py
RUN mkdir -p /ragflow/rag/res/deepdoc /root/.ragflow
RUN --mount=type=bind,source=huggingface.io,target=/huggingface.io \
tar --exclude='.*' -cf - \
/huggingface.io/InfiniFlow/text_concat_xgb_v1.0 \
/huggingface.io/InfiniFlow/deepdoc \
| tar -xf - --strip-components=3 -C /ragflow/rag/res/deepdoc
RUN --mount=type=bind,source=huggingface.io,target=/huggingface.io \
tar -cf - \
/huggingface.io/BAAI/bge-large-zh-v1.5 \
/huggingface.io/BAAI/bge-reranker-v2-m3 \
/huggingface.io/jinaai/jina-embeddings-v3 \
/huggingface.io/jinaai/jina-reranker-v2-base-multilingual \
/huggingface.io/maidalun1020/bce-embedding-base_v1 \
/huggingface.io/maidalun1020/bce-reranker-base_v1 \
| tar -xf - --strip-components=2 -C /root/.ragflow

# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist

# Copy Python environment and packages
ENV VIRTUAL_ENV=/ragflow/.venv
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"

ADD ./api ./api
ADD ./conf ./conf
ADD ./deepdoc ./deepdoc
ADD ./rag ./rag
ADD ./agent ./agent
ADD ./graphrag ./graphrag
# Download nltk data
RUN python3 -m nltk.downloader wordnet punkt punkt_tab

ENV PYTHONPATH=/ragflow/
ENV HF_ENDPOINT=https://hf-mirror.com

ADD docker/entrypoint.sh ./entrypoint.sh
ADD docker/.env ./
COPY docker/entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]
ENTRYPOINT ["./entrypoint.sh"]
51 changes: 43 additions & 8 deletions docs/guides/develop/build_docker_image.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,17 +36,52 @@ cd ragflow

### Build the Docker Image

Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tages the image with a name for reference purpose.
Navigate to the `ragflow` directory where the Dockerfile and other necessary files are located. Now you can build the Docker image using the provided Dockerfile. The command below specifies which Dockerfile to use and tags the image with a name for reference purpose.

#### Build image `ragflow:dev-slim`
#### Build and push multi-arch image `ragflow:dev-slim`

On a `linux/amd64` host:
```bash
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim-amd64 .
docker push infiniflow/ragflow:dev-slim-amd64
```

On a `linux/arm64` host:
```bash
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim-arm64 .
docker push infiniflow/ragflow:dev-slim-arm64
```

On any linux host:
```bash
docker manifest create infiniflow/ragflow:dev-slim --amend infiniflow/ragflow:dev-slim-amd64 --amend infiniflow/ragflow:dev-slim-arm64
docker manifest push infiniflow/ragflow:dev-slim
```

This image's size is about 1 GB. It relies external LLM services since it doesn't contain embedding models.

#### Build and push multi-arch image `ragflow:dev`

On a `linux/amd64` host:
```bash
docker build -f Dockerfile.slim -t infiniflow/ragflow:dev-slim .
pip3 install huggingface-hub
python3 download_deps.py
docker build -f Dockerfile -t infiniflow/ragflow:dev-amd64 .
docker push infiniflow/ragflow:dev-amd64
```
This image's size is about 1GB. It relies external LLM services since it doesn't contain embedding models.

#### Build image `ragflow:dev`
On a `linux/arm64` host:
```bash
cd ragflow/
docker build -f Dockerfile -t infiniflow/ragflow:dev .
pip3 install huggingface-hub
python3 download_deps.py
docker build -f Dockerfile -t infiniflow/ragflow:dev-arm64 .
docker push infiniflow/ragflow:dev-arm64
```
This image's size is about 11GB. It contains embedding models, and can inference via local CPU/GPU or external LLM services.

On any linux host:
```bash
docker manifest create infiniflow/ragflow:dev --amend infiniflow/ragflow:dev-amd64 --amend infiniflow/ragflow:dev-arm64
docker manifest push infiniflow/ragflow:dev
```

This image's size is about 11 GB. It contains embedding models, and can inference via local CPU/GPU or external LLM services.
26 changes: 26 additions & 0 deletions download_deps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python3

from huggingface_hub import snapshot_download
import os

repos = [
"InfiniFlow/text_concat_xgb_v1.0",
"InfiniFlow/deepdoc",
"BAAI/bge-large-zh-v1.5",
"BAAI/bge-reranker-v2-m3",
"jinaai/jina-embeddings-v3",
"jinaai/jina-reranker-v2-base-multilingual",
"maidalun1020/bce-embedding-base_v1",
"maidalun1020/bce-reranker-base_v1",
]


def download_model(repo_id):
local_dir = os.path.join("huggingface.io", repo_id)
os.makedirs(local_dir, exist_ok=True)
snapshot_download(repo_id=repo_id, local_dir=local_dir)


if __name__ == "__main__":
for repo_id in repos:
download_model(repo_id)
38 changes: 0 additions & 38 deletions download_deps.sh

This file was deleted.

0 comments on commit c4cfd8a

Please sign in to comment.