Skip to content

Commit

Permalink
fix(api): make migration columns nullable (#139)
Browse files Browse the repository at this point in the history
This PR
- makes new document columns nullable
- upgrades rag transformers and tokenizers to fix the error
huggingface/transformers#31789

Part of #108
  • Loading branch information
mawandm authored Jul 5, 2024
1 parent 0df8758 commit 2817f0e
Show file tree
Hide file tree
Showing 5 changed files with 11 additions and 12 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/test_rag.yml
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ jobs:
sudo apt update -y
sudo apt install ffmpeg tesseract-ocr poppler-utils -y
pip install -r nesis/rag/requirements.txt -r nesis/rag/requirements-test.txt -r nesis/rag/requirements-huggingface.txt
pip install -U transformers
pip install -U tokenizers
- name: Run unit tests
env:
NESIS_MEMCACHE_HOSTS: localhost:11211
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,11 @@ def upgrade() -> None:
op.add_column(
"document",
sa.Column(
"status",
sa.Enum("SUCCESS", "PROCESSING", "ERROR", name="document_status"),
nullable=False,
"status", sa.Enum("SUCCESS", "PROCESSING", "ERROR", name="document_status")
),
)
op.add_column("document", sa.Column("last_modified", sa.DateTime(), nullable=False))
op.add_column(
"document", sa.Column("last_processed", sa.DateTime(), nullable=False)
)
op.add_column("document", sa.Column("last_modified", sa.DateTime()))
op.add_column("document", sa.Column("last_processed", sa.DateTime()))
op.add_column(
"document", sa.Column("last_processed_message", sa.Text(), nullable=True)
)
Expand Down
5 changes: 2 additions & 3 deletions nesis/api/core/models/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,10 @@ class Document(Base):
store_metadata = Column(JSONB)
status = Column(
Enum(objects.DocumentStatus, name="document_status"),
nullable=False,
default=objects.DocumentStatus.PROCESSING,
)
last_modified = Column(DateTime, default=dt.datetime.utcnow, nullable=False)
last_processed = Column(DateTime, default=dt.datetime.utcnow, nullable=False)
last_modified = Column(DateTime, default=dt.datetime.utcnow)
last_processed = Column(DateTime, default=dt.datetime.utcnow)
last_processed_message = Column(Text)

__table_args__ = (
Expand Down
1 change: 1 addition & 0 deletions nesis/rag/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ RUN if [ "$CORE" = "cuda" ] ; \
--default-timeout=1200 ; \
fi

RUN /app/.venv/bin/pip install -U transformers tokenizers


FROM python:3.11.6-slim-bookworm
Expand Down
5 changes: 3 additions & 2 deletions nesis/rag/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ Werkzeug==3.0.1
pandas==2.2.1

injector==0.21.0
llama-index==0.10.23
llama-index==0.10.52
llama-index-llms-openai-like==0.1.3
llama-index-readers-json==0.1.5
llama-index-vector-stores-postgres==0.1.4.post1
llama-index-vector-stores-chroma==0.1.6
llama-index-vector-stores-qdrant==0.1.4
llama-index-readers-file==0.1.12
llama-index-llms-openai==0.1.12
#llama-index-llms-openai==0.1.12

boto3==1.34.75

Expand Down Expand Up @@ -52,3 +52,4 @@ pillow_heif==0.16.0

# This causes conflicts from onnxruntime, so we attempt to install it last. Do not pin to a version so pip resolves it
llama-index-embeddings-fastembed

0 comments on commit 2817f0e

Please sign in to comment.