Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework Dockerfile.scratch #2525

Merged
merged 2 commits into from
Sep 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 70 additions & 35 deletions Dockerfile.scratch
Original file line number Diff line number Diff line change
@@ -1,56 +1,91 @@
FROM ubuntu:22.04
# base stage
FROM ubuntu:24.04 AS base
USER root

WORKDIR /ragflow

RUN apt-get update && apt-get install -y wget curl build-essential libopenmpi-dev
RUN rm -f /etc/apt/apt.conf.d/docker-clean \
&& echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache

RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
bash ~/miniconda.sh -b -p /root/miniconda3 && \
rm ~/miniconda.sh && ln -s /root/miniconda3/etc/profile.d/conda.sh /etc/profile.d/conda.sh && \
echo ". /root/miniconda3/etc/profile.d/conda.sh" >> ~/.bashrc && \
echo "conda activate base" >> ~/.bashrc
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt-get --no-install-recommends install -y ca-certificates

ENV PATH /root/miniconda3/bin:$PATH
# if you located in China, you can use tsinghua mirror to speed up apt
RUN sed -i 's|http://archive.ubuntu.com|https://mirrors.tuna.tsinghua.edu.cn|g' /etc/apt/sources.list.d/ubuntu.sources

RUN conda create -y --name py11 python=3.11
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y curl libpython3-dev nginx openmpi-bin openmpi-common libopenmpi-dev libglib2.0-0 libglx-mesa0 \
&& rm -rf /var/lib/apt/lists/* \
&& curl -sSL https://install.python-poetry.org | python3 -

ENV CONDA_DEFAULT_ENV py11
ENV CONDA_PREFIX /root/miniconda3/envs/py11
ENV PATH $CONDA_PREFIX/bin:$PATH
ENV PYTHONDONTWRITEBYTECODE=1 LD_LIBRARY_PATH=usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH

RUN curl -sL https://deb.nodesource.com/setup_14.x | bash -
RUN apt-get install -y nodejs
# Configure Poetry
ENV POETRY_NO_INTERACTION=1
ENV POETRY_VIRTUALENVS_IN_PROJECT=true
ENV POETRY_VIRTUALENVS_CREATE=true
ENV POETRY_REQUESTS_TIMEOUT=15

RUN apt-get install -y nginx
# builder stage
FROM base AS builder
USER root

WORKDIR /ragflow

RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y nodejs npm && \
rm -rf /var/lib/apt/lists/*

# if you located in China, you can use taobao registry to speed up npm and yarn
RUN npm config set registry https://registry.npmmirror.com/

# https://yarnpkg.com/getting-started/install
COPY web web
RUN cd web && npm install -g corepack && corepack enable && yarn install && yarn run build

ADD ./web ./web
ADD ./api ./api
ADD ./conf ./conf
ADD ./deepdoc ./deepdoc
ADD ./rag ./rag
ADD ./requirements.txt ./requirements.txt
ADD ./agent ./agent
ADD ./graphrag ./graphrag
# install dependencies from poetry.lock file
COPY pyproject.toml poetry.toml poetry.lock ./
RUN --mount=type=cache,target=/root/.cache/pypoetry,sharing=locked \
/root/.local/bin/poetry install --sync --no-cache --no-root

RUN apt install openmpi-bin openmpi-common libopenmpi-dev
ENV LD_LIBRARY_PATH /usr/lib/x86_64-linux-gnu/openmpi/lib:$LD_LIBRARY_PATH
RUN rm /root/miniconda3/envs/py11/compiler_compat/ld
RUN cd ./web && npm i --force && npm run build
RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ -r ./requirements.txt
# production stage
FROM base AS production
USER root

WORKDIR /ragflow

RUN apt-get update && \
apt-get install -y libglib2.0-0 libgl1-mesa-glx && \
# Install python packages' dependencies
# cv2 requires libGL.so.1
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
apt update && apt install -y --no-install-recommends nginx libgl1 vim less && \
rm -rf /var/lib/apt/lists/*

RUN conda run -n py11 pip install -i https://mirrors.aliyun.com/pypi/simple/ ollama
RUN conda run -n py11 python -m nltk.downloader punkt
RUN conda run -n py11 python -m nltk.downloader wordnet
COPY web web
COPY api api
COPY conf conf
COPY deepdoc deepdoc
COPY rag rag
COPY agent agent
COPY graphrag graphrag
COPY pyproject.toml poetry.toml poetry.lock ./

# Copy compiled web pages
COPY --from=builder /ragflow/web/dist /ragflow/web/dist

# Copy Python environment and packages
ENV VIRTUAL_ENV=/ragflow/.venv
COPY --from=builder ${VIRTUAL_ENV} ${VIRTUAL_ENV}
ENV PATH="${VIRTUAL_ENV}/bin:/root/.local/bin:${PATH}"

# Download nltk data
RUN python3 -m nltk.downloader wordnet punkt punkt_tab

# Copy models downloaded via download_deps.sh
COPY det.onnx layout.laws.onnx layout.manual.onnx layout.onnx layout.paper.onnx ocr.res rec.onnx tsr.onnx updown_concat_xgb.model /ragflow/rag/res/deepdoc/

ENV PYTHONPATH=/ragflow/
ENV HF_ENDPOINT=https://hf-mirror.com

ADD docker/entrypoint.sh ./entrypoint.sh
COPY docker/entrypoint.sh ./entrypoint.sh
RUN chmod +x ./entrypoint.sh

ENTRYPOINT ["./entrypoint.sh"]
2 changes: 1 addition & 1 deletion api/ragflow_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def update_progress():


if __name__ == '__main__':
print("""
print(r"""
____ ______ __
/ __ \ ____ _ ____ _ / ____// /____ _ __
/ /_/ // __ `// __ `// /_ / // __ \| | /| / /
Expand Down
5 changes: 4 additions & 1 deletion docker/.env
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,13 @@ REDIS_PASSWORD=infini_rag_flow

SVR_HTTP_PORT=9380

RAGFLOW_VERSION=dev
RAGFLOW_VERSION=poetry

TIMEZONE='Asia/Shanghai'

# Inside GFW, we need the following huggingface.co mirror:
HF_ENDPOINT=https://hf-mirror.com

######## OS setup for ES ###########
# sysctl vm.max_map_count
# sudo sysctl -w vm.max_map_count=262144
Expand Down
3 changes: 2 additions & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ services:
- ${SVR_HTTP_PORT}:9380
- 80:80
- 443:443
- 5678:5678
volumes:
- ./service_conf.yaml:/ragflow/conf/service_conf.yaml
- ./ragflow-logs:/ragflow/logs
Expand All @@ -23,7 +24,7 @@ services:
- ./nginx/nginx.conf:/etc/nginx/nginx.conf
environment:
- TZ=${TIMEZONE}
- HF_ENDPOINT=https://huggingface.co
- HF_ENDPOINT=${HF_ENDPOINT}
- MACOS=${MACOS}
networks:
- ragflow
Expand Down
38 changes: 38 additions & 0 deletions download_deps.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env bash

download()
{
echo "download $1"
# https://stackoverflow.com/questions/3162385/how-to-split-a-string-in-shell-and-get-the-last-field
fn=${1##*/}
if [ ! -f $fn ] ; then
wget --no-check-certificate $1
fi
}

# https://stackoverflow.com/questions/24628076/convert-multiline-string-to-array
names="https://huggingface.co/InfiniFlow/deepdoc/resolve/main/det.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.laws.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.manual.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/layout.paper.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/ocr.res
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/rec.onnx
https://huggingface.co/InfiniFlow/deepdoc/resolve/main/tsr.onnx
https://huggingface.co/InfiniFlow/text_concat_xgb_v1.0/resolve/main/updown_concat_xgb.model"

SAVEIFS=$IFS # Save current IFS (Internal Field Separator)
IFS=$'\n' # Change IFS to newline char
names=($names) # split the `names` string into an array by the same name
IFS=$SAVEIFS # Restore original IFS

find . -size 0 | xargs rm -f
# https://stackoverflow.com/questions/15466808/shell-iterate-over-array
for ((i=0; i<${#names[@]}; i+=1)); do
url="${names[$i]}"
download $url
if [ $? != 0 ]; then
exit -1
fi
done
find . -size 0 | xargs rm -f
Loading